提交 d9c0e59f 编写于 作者: P Paolo Bonzini

Merge tag 'kvm_mips_4.11_1' of...

Merge tag 'kvm_mips_4.11_1' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/kvm-mips into HEAD

KVM: MIPS: GVA/GPA page tables, dirty logging, SYNC_MMU etc

Numerous MIPS KVM fixes, improvements, and features for 4.11, many of
which continue to pave the way for VZ support, the most interesting of
which are:

 - Add GVA->HPA page tables for T&E, to cache GVA mappings.
 - Generate fast-path TLB refill exception handler which loads host TLB
   entries from GVA page table, avoiding repeated guest memory
   translation and guest TLB lookups.
 - Use uaccess macros when T&E needs to access guest memory, which with
   GVA page tables and the Linux TLB refill handler improves robustness
   against TLB faults and fixes EVA hosts.
 - Use BadInstr/BadInstrP registers when available to obtain instruction
   encodings after a synchronous trap.
 - Add GPA->HPA page tables to replace the inflexible linear array,
   allowing for multiple sparsely arranged memory regions.
 - Properly implement dirty page logging.
 - Add KVM_CAP_SYNC_MMU support so that changes in GPA mappings become
   effective in guests even if they are already running, allowing for
   copy-on-write, KSM, idle page tracking, swapping, and guest memory
   ballooning.
 - Add KVM_CAP_READONLY_MEM support, so writes to specified memory
   regions are treated as MMIO.
 - Implement proper CP0_EBase support in T&E.
 - Expose a few more missing CP0 registers to userland.
 - Add KVM_CAP_NR_VCPUS and KVM_CAP_MAX_VCPUS support, and allow up to 8
   VCPUs to be created in a VM.
 - Various cleanups and dropping of dead and duplicated code.
......@@ -2061,6 +2061,8 @@ registers, find a list below:
MIPS | KVM_REG_MIPS_LO | 64
MIPS | KVM_REG_MIPS_PC | 64
MIPS | KVM_REG_MIPS_CP0_INDEX | 32
MIPS | KVM_REG_MIPS_CP0_ENTRYLO0 | 64
MIPS | KVM_REG_MIPS_CP0_ENTRYLO1 | 64
MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64
MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64
MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32
......@@ -2071,9 +2073,11 @@ registers, find a list below:
MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64
MIPS | KVM_REG_MIPS_CP0_COMPARE | 32
MIPS | KVM_REG_MIPS_CP0_STATUS | 32
MIPS | KVM_REG_MIPS_CP0_INTCTL | 32
MIPS | KVM_REG_MIPS_CP0_CAUSE | 32
MIPS | KVM_REG_MIPS_CP0_EPC | 64
MIPS | KVM_REG_MIPS_CP0_PRID | 32
MIPS | KVM_REG_MIPS_CP0_EBASE | 64
MIPS | KVM_REG_MIPS_CP0_CONFIG | 32
MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32
MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32
......@@ -2148,6 +2152,12 @@ patterns depending on whether they're 32-bit or 64-bit registers:
0x7020 0000 0001 00 <reg:5> <sel:3> (32-bit)
0x7030 0000 0001 00 <reg:5> <sel:3> (64-bit)
Note: KVM_REG_MIPS_CP0_ENTRYLO0 and KVM_REG_MIPS_CP0_ENTRYLO1 are the MIPS64
versions of the EntryLo registers regardless of the word size of the host
hardware, host kernel, guest, and whether XPA is present in the guest, i.e.
with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and
the PFNX field starting at bit 30.
MIPS KVM control registers (see above) have the following id bit patterns:
0x7030 0000 0002 <reg:16>
......
......@@ -43,6 +43,7 @@
#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0)
#define KVM_REG_MIPS_CP0_INTCTL MIPS_CP0_32(12, 1)
#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0)
#define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0)
#define KVM_REG_MIPS_CP0_PRID MIPS_CP0_32(15, 0)
......@@ -64,7 +65,7 @@
#define KVM_REG_MIPS_CP0_KSCRATCH6 MIPS_CP0_64(31, 7)
#define KVM_MAX_VCPUS 1
#define KVM_MAX_VCPUS 8
#define KVM_USER_MEM_SLOTS 8
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 0
......@@ -88,6 +89,7 @@
#define KVM_GUEST_KUSEG 0x00000000UL
#define KVM_GUEST_KSEG0 0x40000000UL
#define KVM_GUEST_KSEG1 0x40000000UL
#define KVM_GUEST_KSEG23 0x60000000UL
#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0xe0000000)
#define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff)
......@@ -104,7 +106,6 @@
#define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23)
#define KVM_INVALID_PAGE 0xdeadbeef
#define KVM_INVALID_INST 0xdeadbeef
#define KVM_INVALID_ADDR 0xdeadbeef
/*
......@@ -121,8 +122,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
return IS_ERR_VALUE(addr);
}
extern atomic_t kvm_mips_instance;
struct kvm_vm_stat {
ulong remote_tlb_flush;
};
......@@ -156,12 +155,8 @@ struct kvm_arch_memory_slot {
};
struct kvm_arch {
/* Guest GVA->HPA page table */
unsigned long *guest_pmap;
unsigned long guest_pmap_npages;
/* Wired host TLB used for the commpage */
int commpage_tlb;
/* Guest physical mm */
struct mm_struct gpa_mm;
};
#define N_MIPS_COPROC_REGS 32
......@@ -233,6 +228,7 @@ enum emulation_result {
EMULATE_FAIL, /* can't emulate this instruction */
EMULATE_WAIT, /* WAIT instruction */
EMULATE_PRIV_FAIL,
EMULATE_EXCEPT, /* A guest exception has been generated */
};
#define mips3_paddr_to_tlbpfn(x) \
......@@ -250,6 +246,7 @@ enum emulation_result {
#define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID)
#define TLB_LO_IDX(x, va) (((va) >> PAGE_SHIFT) & 1)
#define TLB_IS_VALID(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_V)
#define TLB_IS_DIRTY(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_D)
#define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \
((y) & VPN2_MASK & ~(x).tlb_mask))
#define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \
......@@ -261,6 +258,17 @@ struct kvm_mips_tlb {
long tlb_lo[2];
};
#define KVM_NR_MEM_OBJS 4
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
#define KVM_MIPS_AUX_FPU 0x1
#define KVM_MIPS_AUX_MSA 0x2
......@@ -275,6 +283,8 @@ struct kvm_vcpu_arch {
unsigned long host_cp0_badvaddr;
unsigned long host_cp0_epc;
u32 host_cp0_cause;
u32 host_cp0_badinstr;
u32 host_cp0_badinstrp;
/* GPRS */
unsigned long gprs[32];
......@@ -318,20 +328,18 @@ struct kvm_vcpu_arch {
/* Bitmask of pending exceptions to be cleared */
unsigned long pending_exceptions_clr;
/* Save/Restore the entryhi register when are are preempted/scheduled back in */
unsigned long preempt_entryhi;
/* S/W Based TLB for guest */
struct kvm_mips_tlb guest_tlb[KVM_MIPS_GUEST_TLB_SIZE];
/* Cached guest kernel/user ASIDs */
u32 guest_user_asid[NR_CPUS];
u32 guest_kernel_asid[NR_CPUS];
/* Guest kernel/user [partial] mm */
struct mm_struct guest_kernel_mm, guest_user_mm;
/* Guest ASID of last user mode execution */
unsigned int last_user_gasid;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
int last_sched_cpu;
/* WAIT executed */
......@@ -339,14 +347,15 @@ struct kvm_vcpu_arch {
u8 fpu_enabled;
u8 msa_enabled;
u8 kscratch_enabled;
};
#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0])
#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0])
#define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val))
#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0])
#define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val))
#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
......@@ -522,9 +531,17 @@ struct kvm_mips_callbacks {
int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
int (*handle_fpe)(struct kvm_vcpu *vcpu);
int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
int (*vm_init)(struct kvm *kvm);
int (*vcpu_init)(struct kvm_vcpu *vcpu);
void (*vcpu_uninit)(struct kvm_vcpu *vcpu);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
void (*flush_shadow_all)(struct kvm *kvm);
/*
* Must take care of flushing any cached GPA PTEs (e.g. guest entries in
* VZ root TLB, or T&E GVA page tables and corresponding root TLB
* mappings).
*/
void (*flush_shadow_memslot)(struct kvm *kvm,
const struct kvm_memory_slot *slot);
gpa_t (*gva_to_gpa)(gva_t gva);
void (*queue_timer_int)(struct kvm_vcpu *vcpu);
void (*dequeue_timer_int)(struct kvm_vcpu *vcpu);
......@@ -542,8 +559,10 @@ struct kvm_mips_callbacks {
const struct kvm_one_reg *reg, s64 *v);
int (*set_one_reg)(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg, s64 v);
int (*vcpu_get_regs)(struct kvm_vcpu *vcpu);
int (*vcpu_set_regs)(struct kvm_vcpu *vcpu);
int (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
int (*vcpu_put)(struct kvm_vcpu *vcpu, int cpu);
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*vcpu_reenter)(struct kvm_run *run, struct kvm_vcpu *vcpu);
};
extern struct kvm_mips_callbacks *kvm_mips_callbacks;
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
......@@ -556,6 +575,7 @@ extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu);
/* Building of entry/exception code */
int kvm_mips_entry_setup(void);
void *kvm_mips_build_vcpu_run(void *addr);
void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler);
void *kvm_mips_build_exception(void *addr, void *handler);
void *kvm_mips_build_exit(void *addr);
......@@ -580,54 +600,125 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu);
u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu);
extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr,
struct kvm_vcpu *vcpu);
struct kvm_vcpu *vcpu,
bool write_fault);
extern int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
struct kvm_vcpu *vcpu);
extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
struct kvm_mips_tlb *tlb);
struct kvm_mips_tlb *tlb,
unsigned long gva,
bool write_fault);
extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_handle_tlbmod(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
struct kvm_vcpu *vcpu,
bool write_fault);
extern void kvm_mips_dump_host_tlbs(void);
extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu);
extern int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
unsigned long entrylo0,
unsigned long entrylo1,
int flush_dcache_mask);
extern void kvm_mips_flush_host_tlb(int skip_kseg0);
extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi,
bool user, bool kernel);
extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
unsigned long entryhi);
extern int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr);
extern unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
unsigned long gva);
extern void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
struct kvm_vcpu *vcpu);
extern void kvm_local_flush_tlb_all(void);
extern void kvm_mips_alloc_new_mmu_context(struct kvm_vcpu *vcpu);
extern void kvm_mips_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
extern void kvm_mips_vcpu_put(struct kvm_vcpu *vcpu);
void kvm_mips_suspend_mm(int cpu);
void kvm_mips_resume_mm(int cpu);
/* MMU handling */
/**
* enum kvm_mips_flush - Types of MMU flushes.
* @KMF_USER: Flush guest user virtual memory mappings.
* Guest USeg only.
* @KMF_KERN: Flush guest kernel virtual memory mappings.
* Guest USeg and KSeg2/3.
* @KMF_GPA: Flush guest physical memory mappings.
* Also includes KSeg0 if KMF_KERN is set.
*/
enum kvm_mips_flush {
KMF_USER = 0x0,
KMF_KERN = 0x1,
KMF_GPA = 0x2,
};
void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags);
bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn);
int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn);
pgd_t *kvm_pgd_alloc(void);
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr,
bool user);
void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu);
void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu);
enum kvm_mips_fault_result {
KVM_MIPS_MAPPED = 0,
KVM_MIPS_GVA,
KVM_MIPS_GPA,
KVM_MIPS_TLB,
KVM_MIPS_TLBINV,
KVM_MIPS_TLBMOD,
};
enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
unsigned long gva,
bool write);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
unsigned long address)
{
}
/* Emulation */
u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu);
int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
/**
* kvm_is_ifetch_fault() - Find whether a TLBL exception is due to ifetch fault.
* @vcpu: Virtual CPU.
*
* Returns: Whether the TLBL exception was likely due to an instruction
* fetch fault rather than a data load fault.
*/
static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *vcpu)
{
unsigned long badvaddr = vcpu->host_cp0_badvaddr;
unsigned long epc = msk_isa16_mode(vcpu->pc);
u32 cause = vcpu->host_cp0_cause;
if (epc == badvaddr)
return true;
/*
* Branches may be 32-bit or 16-bit instructions.
* This isn't exact, but we don't really support MIPS16 or microMIPS yet
* in KVM anyway.
*/
if ((cause & CAUSEF_BD) && badvaddr - epc <= 4)
return true;
return false;
}
extern enum emulation_result kvm_mips_emulate_inst(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_syscall(u32 cause,
u32 *opc,
struct kvm_run *run,
......@@ -761,10 +852,6 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
......
......@@ -29,9 +29,11 @@ do { \
} \
} while (0)
extern void tlbmiss_handler_setup_pgd(unsigned long);
/* Note: This is also implemented with uasm in arch/mips/kvm/entry.c */
#define TLBMISS_HANDLER_SETUP_PGD(pgd) \
do { \
extern void tlbmiss_handler_setup_pgd(unsigned long); \
tlbmiss_handler_setup_pgd((unsigned long)(pgd)); \
htw_set_pwbase((unsigned long)pgd); \
} while (0)
......@@ -97,17 +99,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
static inline void
get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
{
extern void kvm_local_flush_tlb_all(void);
unsigned long asid = asid_cache(cpu);
if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) {
if (cpu_has_vtag_icache)
flush_icache_all();
#ifdef CONFIG_KVM
kvm_local_flush_tlb_all(); /* start new asid cycle */
#else
local_flush_tlb_all(); /* start new asid cycle */
#endif
if (!asid) /* fix version if needed */
asid = asid_first_version(cpu);
}
......
......@@ -43,21 +43,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
* Initialize a new pgd / pmd table with invalid pointers.
*/
extern void pgd_init(unsigned long page);
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *ret, *init;
ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
if (ret) {
init = pgd_offset(&init_mm, 0UL);
pgd_init((unsigned long)ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
return ret;
}
extern pgd_t *pgd_alloc(struct mm_struct *mm);
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
......
......@@ -147,49 +147,64 @@ static inline void flush_scache_line(unsigned long addr)
}
#define protected_cache_op(op,addr) \
({ \
int __err = 0; \
__asm__ __volatile__( \
" .set push \n" \
" .set noreorder \n" \
" .set "MIPS_ISA_ARCH_LEVEL" \n" \
"1: cache %0, (%1) \n" \
"1: cache %1, (%2) \n" \
"2: .set pop \n" \
" .section .fixup,\"ax\" \n" \
"3: li %0, %3 \n" \
" j 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
" "STR(PTR)" 1b, 2b \n" \
" "STR(PTR)" 1b, 3b \n" \
" .previous" \
: \
: "i" (op), "r" (addr))
: "+r" (__err) \
: "i" (op), "r" (addr), "i" (-EFAULT)); \
__err; \
})
#define protected_cachee_op(op,addr) \
({ \
int __err = 0; \
__asm__ __volatile__( \
" .set push \n" \
" .set noreorder \n" \
" .set mips0 \n" \
" .set eva \n" \
"1: cachee %0, (%1) \n" \
"1: cachee %1, (%2) \n" \
"2: .set pop \n" \
" .section .fixup,\"ax\" \n" \
"3: li %0, %3 \n" \
" j 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
" "STR(PTR)" 1b, 2b \n" \
" "STR(PTR)" 1b, 3b \n" \
" .previous" \
: \
: "i" (op), "r" (addr))
: "+r" (__err) \
: "i" (op), "r" (addr), "i" (-EFAULT)); \
__err; \
})
/*
* The next two are for badland addresses like signal trampolines.
*/
static inline void protected_flush_icache_line(unsigned long addr)
static inline int protected_flush_icache_line(unsigned long addr)
{
switch (boot_cpu_type()) {
case CPU_LOONGSON2:
protected_cache_op(Hit_Invalidate_I_Loongson2, addr);
break;
return protected_cache_op(Hit_Invalidate_I_Loongson2, addr);
default:
#ifdef CONFIG_EVA
protected_cachee_op(Hit_Invalidate_I, addr);
return protected_cachee_op(Hit_Invalidate_I, addr);
#else
protected_cache_op(Hit_Invalidate_I, addr);
return protected_cache_op(Hit_Invalidate_I, addr);
#endif
break;
}
}
......@@ -199,21 +214,21 @@ static inline void protected_flush_icache_line(unsigned long addr)
* caches. We're talking about one cacheline unnecessarily getting invalidated
* here so the penalty isn't overly hard.
*/
static inline void protected_writeback_dcache_line(unsigned long addr)
static inline int protected_writeback_dcache_line(unsigned long addr)
{
#ifdef CONFIG_EVA
protected_cachee_op(Hit_Writeback_Inv_D, addr);
return protected_cachee_op(Hit_Writeback_Inv_D, addr);
#else
protected_cache_op(Hit_Writeback_Inv_D, addr);
return protected_cache_op(Hit_Writeback_Inv_D, addr);
#endif
}
static inline void protected_writeback_scache_line(unsigned long addr)
static inline int protected_writeback_scache_line(unsigned long addr)
{
#ifdef CONFIG_EVA
protected_cachee_op(Hit_Writeback_Inv_SD, addr);
return protected_cachee_op(Hit_Writeback_Inv_SD, addr);
#else
protected_cache_op(Hit_Writeback_Inv_SD, addr);
return protected_cache_op(Hit_Writeback_Inv_SD, addr);
#endif
}
......
#ifndef __ASM_TLBEX_H
#define __ASM_TLBEX_H
#include <asm/uasm.h>
/*
* Write random or indexed TLB entry, and care about the hazards from
* the preceding mtc0 and for the following eret.
*/
enum tlb_write_entry {
tlb_random,
tlb_indexed
};
extern int pgd_reg;
void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
unsigned int tmp, unsigned int ptr);
void build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr);
void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr);
void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep);
void build_tlb_write_entry(u32 **p, struct uasm_label **l,
struct uasm_reloc **r,
enum tlb_write_entry wmode);
#endif /* __ASM_TLBEX_H */
......@@ -9,6 +9,9 @@
* Copyright (C) 2012, 2013 MIPS Technologies, Inc. All rights reserved.
*/
#ifndef __ASM_UASM_H
#define __ASM_UASM_H
#include <linux/types.h>
#ifdef CONFIG_EXPORT_UASM
......@@ -309,3 +312,5 @@ void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
unsigned int reg2, int lid);
void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
#endif /* __ASM_UASM_H */
......@@ -19,6 +19,8 @@
* Some parts derived from the x86 version of this file.
*/
#define __KVM_HAVE_READONLY_MEM
/*
* for KVM_GET_REGS and KVM_SET_REGS
*
......
......@@ -20,7 +20,9 @@ config KVM
select EXPORT_UASM
select PREEMPT_NOTIFIERS
select ANON_INODES
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_MMIO
select MMU_NOTIFIER
select SRCU
---help---
Support for hosting Guest kernels.
......
......@@ -13,6 +13,7 @@
#include <linux/err.h>
#include <linux/highmem.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/bootmem.h>
......@@ -29,29 +30,38 @@
static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc,
union mips_instruction replace)
{
unsigned long paddr, flags;
void *vaddr;
if (KVM_GUEST_KSEGX((unsigned long)opc) == KVM_GUEST_KSEG0) {
paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu,
(unsigned long)opc);
vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr)));
vaddr += paddr & ~PAGE_MASK;
memcpy(vaddr, (void *)&replace, sizeof(u32));
local_flush_icache_range((unsigned long)vaddr,
(unsigned long)vaddr + 32);
kunmap_atomic(vaddr);
} else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
local_irq_save(flags);
memcpy((void *)opc, (void *)&replace, sizeof(u32));
__local_flush_icache_user_range((unsigned long)opc,
(unsigned long)opc + 32);
local_irq_restore(flags);
} else {
kvm_err("%s: Invalid address: %p\n", __func__, opc);
unsigned long vaddr = (unsigned long)opc;
int err;
retry:
/* The GVA page table is still active so use the Linux TLB handlers */
kvm_trap_emul_gva_lockless_begin(vcpu);
err = put_user(replace.word, opc);
kvm_trap_emul_gva_lockless_end(vcpu);
if (unlikely(err)) {
/*
* We write protect clean pages in GVA page table so normal
* Linux TLB mod handler doesn't silently dirty the page.
* Its also possible we raced with a GVA invalidation.
* Try to force the page to become dirty.
*/
err = kvm_trap_emul_gva_fault(vcpu, vaddr, true);
if (unlikely(err)) {
kvm_info("%s: Address unwriteable: %p\n",
__func__, opc);
return -EFAULT;
}
/*
* Try again. This will likely trigger a TLB refill, which will
* fetch the new dirty entry from the GVA page table, which
* should then succeed.
*/
goto retry;
}
__local_flush_icache_user_range(vaddr, vaddr + 4);
return 0;
}
......
......@@ -38,23 +38,25 @@
* Compute the return address and do emulate branch simulation, if required.
* This function should be called only in branch delay slot active.
*/
unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
unsigned long instpc)
static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc,
unsigned long *out)
{
unsigned int dspcontrol;
union mips_instruction insn;
struct kvm_vcpu_arch *arch = &vcpu->arch;
long epc = instpc;
long nextpc = KVM_INVALID_INST;
long nextpc;
int err;
if (epc & 3)
goto unaligned;
if (epc & 3) {
kvm_err("%s: unaligned epc\n", __func__);
return -EINVAL;
}
/* Read the instruction */
insn.word = kvm_get_inst((u32 *) epc, vcpu);
if (insn.word == KVM_INVALID_INST)
return KVM_INVALID_INST;
err = kvm_get_badinstrp((u32 *)epc, vcpu, &insn.word);
if (err)
return err;
switch (insn.i_format.opcode) {
/* jr and jalr are in r_format format. */
......@@ -66,6 +68,8 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
case jr_op:
nextpc = arch->gprs[insn.r_format.rs];
break;
default:
return -EINVAL;
}
break;
......@@ -114,8 +118,11 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
nextpc = epc;
break;
case bposge32_op:
if (!cpu_has_dsp)
goto sigill;
if (!cpu_has_dsp) {
kvm_err("%s: DSP branch but not DSP ASE\n",
__func__);
return -EINVAL;
}
dspcontrol = rddsp(0x01);
......@@ -125,6 +132,8 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
epc += 8;
nextpc = epc;
break;
default:
return -EINVAL;
}
break;
......@@ -189,7 +198,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
/* And now the FPA/cp1 branch instructions. */
case cop1_op:
kvm_err("%s: unsupported cop1_op\n", __func__);
break;
return -EINVAL;
#ifdef CONFIG_CPU_MIPSR6
/* R6 added the following compact branches with forbidden slots */
......@@ -198,19 +207,19 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
/* only rt == 0 isn't compact branch */
if (insn.i_format.rt != 0)
goto compact_branch;
break;
return -EINVAL;
case pop10_op:
case pop30_op:
/* only rs == rt == 0 is reserved, rest are compact branches */
if (insn.i_format.rs != 0 || insn.i_format.rt != 0)
goto compact_branch;
break;
return -EINVAL;
case pop66_op:
case pop76_op:
/* only rs == 0 isn't compact branch */
if (insn.i_format.rs != 0)
goto compact_branch;
break;
return -EINVAL;
compact_branch:
/*
* If we've hit an exception on the forbidden slot, then
......@@ -221,42 +230,74 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
break;
#else
compact_branch:
/* Compact branches not supported before R6 */
break;
/* Fall through - Compact branches not supported before R6 */
#endif
default:
return -EINVAL;
}
return nextpc;
unaligned:
kvm_err("%s: unaligned epc\n", __func__);
return nextpc;
sigill:
kvm_err("%s: DSP branch but not DSP ASE\n", __func__);
return nextpc;
*out = nextpc;
return 0;
}
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause)
{
unsigned long branch_pc;
enum emulation_result er = EMULATE_DONE;
int err;
if (cause & CAUSEF_BD) {
branch_pc = kvm_compute_return_epc(vcpu, vcpu->arch.pc);
if (branch_pc == KVM_INVALID_INST) {
er = EMULATE_FAIL;
err = kvm_compute_return_epc(vcpu, vcpu->arch.pc,
&vcpu->arch.pc);
if (err)
return EMULATE_FAIL;
} else {
vcpu->arch.pc = branch_pc;
kvm_debug("BD update_pc(): New PC: %#lx\n",
vcpu->arch.pc);
}
} else
vcpu->arch.pc += 4;
}
kvm_debug("update_pc(): New PC: %#lx\n", vcpu->arch.pc);
return er;
return EMULATE_DONE;
}
/**
* kvm_get_badinstr() - Get bad instruction encoding.
* @opc: Guest pointer to faulting instruction.
* @vcpu: KVM VCPU information.
*
* Gets the instruction encoding of the faulting instruction, using the saved
* BadInstr register value if it exists, otherwise falling back to reading guest
* memory at @opc.
*
* Returns: The instruction encoding of the faulting instruction.
*/
int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
{
if (cpu_has_badinstr) {
*out = vcpu->arch.host_cp0_badinstr;
return 0;
} else {
return kvm_get_inst(opc, vcpu, out);
}
}
/**
* kvm_get_badinstrp() - Get bad prior instruction encoding.
* @opc: Guest pointer to prior faulting instruction.
* @vcpu: KVM VCPU information.
*
* Gets the instruction encoding of the prior faulting instruction (the branch
* containing the delay slot which faulted), using the saved BadInstrP register
* value if it exists, otherwise falling back to reading guest memory at @opc.
*
* Returns: The instruction encoding of the prior faulting instruction.
*/
int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
{
if (cpu_has_badinstrp) {
*out = vcpu->arch.host_cp0_badinstrp;
return 0;
} else {
return kvm_get_inst(opc, vcpu, out);
}
}
/**
......@@ -856,22 +897,30 @@ enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu)
static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu,
struct kvm_mips_tlb *tlb)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
int cpu, i;
bool user;
/* No need to flush for entries which are already invalid */
if (!((tlb->tlb_lo[0] | tlb->tlb_lo[1]) & ENTRYLO_V))
return;
/* Don't touch host kernel page tables or TLB mappings */
if ((unsigned long)tlb->tlb_hi > 0x7fffffff)
return;
/* User address space doesn't need flushing for KSeg2/3 changes */
user = tlb->tlb_hi < KVM_GUEST_KSEG0;
preempt_disable();
/* Invalidate page table entries */
kvm_trap_emul_invalidate_gva(vcpu, tlb->tlb_hi & VPN2_MASK, user);
/*
* Probe the shadow host TLB for the entry being overwritten, if one
* matches, invalidate it
*/
kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi);
kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi, user, true);
/* Invalidate the whole ASID on other CPUs */
cpu = smp_processor_id();
......@@ -879,8 +928,8 @@ static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu,
if (i == cpu)
continue;
if (user)
vcpu->arch.guest_user_asid[i] = 0;
vcpu->arch.guest_kernel_asid[i] = 0;
cpu_context(i, user_mm) = 0;
cpu_context(i, kern_mm) = 0;
}
preempt_enable();
......@@ -1017,7 +1066,7 @@ unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu)
unsigned int mask = MIPS_CONF_M;
/* KScrExist */
mask |= (unsigned int)vcpu->arch.kscratch_enabled << 16;
mask |= 0xfc << MIPS_CONF4_KSCREXIST_SHIFT;
return mask;
}
......@@ -1056,6 +1105,7 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
enum emulation_result er = EMULATE_DONE;
u32 rt, rd, sel;
unsigned long curr_pc;
......@@ -1150,14 +1200,13 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
er = EMULATE_FAIL;
break;
}
#define C0_EBASE_CORE_MASK 0xff
if ((rd == MIPS_CP0_PRID) && (sel == 1)) {
/* Preserve CORE number */
kvm_change_c0_guest_ebase(cop0,
~(C0_EBASE_CORE_MASK),
/*
* Preserve core number, and keep the exception
* base in guest KSeg0.
*/
kvm_change_c0_guest_ebase(cop0, 0x1ffff000,
vcpu->arch.gprs[rt]);
kvm_err("MTCz, cop0->reg[EBASE]: %#lx\n",
kvm_read_c0_guest_ebase(cop0));
} else if (rd == MIPS_CP0_TLB_HI && sel == 0) {
u32 nasid =
vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID;
......@@ -1168,6 +1217,17 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
& KVM_ENTRYHI_ASID,
nasid);
/*
* Flush entries from the GVA page
* tables.
* Guest user page table will get
* flushed lazily on re-entry to guest
* user if the guest ASID actually
* changes.
*/
kvm_mips_flush_gva_pt(kern_mm->pgd,
KMF_KERN);
/*
* Regenerate/invalidate kernel MMU
* context.
......@@ -1178,13 +1238,10 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
*/
preempt_disable();
cpu = smp_processor_id();
kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm,
cpu, vcpu);
vcpu->arch.guest_kernel_asid[cpu] =
vcpu->arch.guest_kernel_mm.context.asid[cpu];
get_new_mmu_context(kern_mm, cpu);
for_each_possible_cpu(i)
if (i != cpu)
vcpu->arch.guest_kernel_asid[i] = 0;
cpu_context(i, kern_mm) = 0;
preempt_enable();
}
kvm_write_c0_guest_entryhi(cop0,
......@@ -1639,12 +1696,56 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
return er;
}
static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
unsigned long curr_pc,
unsigned long addr,
struct kvm_run *run,
struct kvm_vcpu *vcpu,
u32 cause)
{
int err;
for (;;) {
/* Carefully attempt the cache operation */
kvm_trap_emul_gva_lockless_begin(vcpu);
err = fn(addr);
kvm_trap_emul_gva_lockless_end(vcpu);
if (likely(!err))
return EMULATE_DONE;
/*
* Try to handle the fault and retry, maybe we just raced with a
* GVA invalidation.
*/
switch (kvm_trap_emul_gva_fault(vcpu, addr, false)) {
case KVM_MIPS_GVA:
case KVM_MIPS_GPA:
/* bad virtual or physical address */
return EMULATE_FAIL;
case KVM_MIPS_TLB:
/* no matching guest TLB */
vcpu->arch.host_cp0_badvaddr = addr;
vcpu->arch.pc = curr_pc;
kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu);
return EMULATE_EXCEPT;
case KVM_MIPS_TLBINV:
/* invalid matching guest TLB */
vcpu->arch.host_cp0_badvaddr = addr;
vcpu->arch.pc = curr_pc;
kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu);
return EMULATE_EXCEPT;
default:
break;
};
}
}
enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
u32 *opc, u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
enum emulation_result er = EMULATE_DONE;
u32 cache, op_inst, op, base;
s16 offset;
......@@ -1701,80 +1802,16 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
goto done;
}
preempt_disable();
if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
__func__, va, vcpu, read_c0_entryhi());
er = EMULATE_FAIL;
preempt_enable();
goto done;
}
} else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
int index;
/* If an entry already exists then skip */
if (kvm_mips_host_tlb_lookup(vcpu, va) >= 0)
goto skip_fault;
/*
* If address not in the guest TLB, then give the guest a fault,
* the resulting handler will do the right thing
*/
index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) |
(kvm_read_c0_guest_entryhi
(cop0) & KVM_ENTRYHI_ASID));
if (index < 0) {
vcpu->arch.host_cp0_badvaddr = va;
vcpu->arch.pc = curr_pc;
er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run,
vcpu);
preempt_enable();
goto dont_update_pc;
} else {
struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index];
/*
* Check if the entry is valid, if not then setup a TLB
* invalid exception to the guest
*/
if (!TLB_IS_VALID(*tlb, va)) {
vcpu->arch.host_cp0_badvaddr = va;
vcpu->arch.pc = curr_pc;
er = kvm_mips_emulate_tlbinv_ld(cause, NULL,
run, vcpu);
preempt_enable();
goto dont_update_pc;
}
/* XXXKYMA: Only a subset of cache ops are supported, used by Linux */
if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) {
/*
* We fault an entry from the guest tlb to the
* shadow host TLB
* Perform the dcache part of icache synchronisation on the
* guest's behalf.
*/
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
__func__, va, index, vcpu,
read_c0_entryhi());
er = EMULATE_FAIL;
preempt_enable();
er = kvm_mips_guest_cache_op(protected_writeback_dcache_line,
curr_pc, va, run, vcpu, cause);
if (er != EMULATE_DONE)
goto done;
}
}
} else {
kvm_err("INVALID CACHE INDEX/ADDRESS (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
cache, op, base, arch->gprs[base], offset);
er = EMULATE_FAIL;
preempt_enable();
goto done;
}
skip_fault:
/* XXXKYMA: Only a subset of cache ops are supported, used by Linux */
if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) {
flush_dcache_line(va);
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
/*
* Replace the CACHE instruction, with a SYNCI, not the same,
......@@ -1783,8 +1820,15 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
kvm_mips_trans_cache_va(inst, opc, vcpu);
#endif
} else if (op_inst == Hit_Invalidate_I) {
flush_dcache_line(va);
flush_icache_line(va);
/* Perform the icache synchronisation on the guest's behalf */
er = kvm_mips_guest_cache_op(protected_writeback_dcache_line,
curr_pc, va, run, vcpu, cause);
if (er != EMULATE_DONE)
goto done;
er = kvm_mips_guest_cache_op(protected_flush_icache_line,
curr_pc, va, run, vcpu, cause);
if (er != EMULATE_DONE)
goto done;
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
/* Replace the CACHE instruction, with a SYNCI */
......@@ -1796,17 +1840,13 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
er = EMULATE_FAIL;
}
preempt_enable();
done:
/* Rollback PC only if emulation was unsuccessful */
if (er == EMULATE_FAIL)
vcpu->arch.pc = curr_pc;
dont_update_pc:
/*
* This is for exceptions whose emulation updates the PC, so do not
* overwrite the PC under any circumstances
*/
/* Guest exception needs guest to resume */
if (er == EMULATE_EXCEPT)
er = EMULATE_DONE;
return er;
}
......@@ -1817,12 +1857,14 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
{
union mips_instruction inst;
enum emulation_result er = EMULATE_DONE;
int err;
/* Fetch the instruction. */
if (cause & CAUSEF_BD)
opc += 1;
inst.word = kvm_get_inst(opc, vcpu);
err = kvm_get_badinstr(opc, vcpu, &inst.word);
if (err)
return EMULATE_FAIL;
switch (inst.r_format.opcode) {
case cop0_op:
......@@ -1874,6 +1916,22 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
return er;
}
/**
* kvm_mips_guest_exception_base() - Find guest exception vector base address.
*
* Returns: The base address of the current guest exception vector, taking
* both Guest.CP0_Status.BEV and Guest.CP0_EBase into account.
*/
long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
if (kvm_read_c0_guest_status(cop0) & ST0_BEV)
return KVM_GUEST_CKSEG1ADDR(0x1fc00200);
else
return kvm_read_c0_guest_ebase(cop0) & MIPS_EBASE_BASE;
}
enum emulation_result kvm_mips_emulate_syscall(u32 cause,
u32 *opc,
struct kvm_run *run,
......@@ -1899,7 +1957,7 @@ enum emulation_result kvm_mips_emulate_syscall(u32 cause,
(EXCCODE_SYS << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
} else {
kvm_err("Trying to deliver SYSCALL when EXL is already set\n");
......@@ -1933,13 +1991,13 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
arch->pc);
/* set pc to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x0;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0;
} else {
kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n",
arch->pc);
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
}
kvm_change_c0_guest_cause(cop0, (0xff),
......@@ -1949,8 +2007,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
/* XXXKYMA: is the context register used by linux??? */
kvm_write_c0_guest_entryhi(cop0, entryhi);
/* Blow away the shadow host TLBs */
kvm_mips_flush_host_tlb(1);
return EMULATE_DONE;
}
......@@ -1978,16 +2034,14 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
kvm_debug("[EXL == 0] delivering TLB INV @ pc %#lx\n",
arch->pc);
/* set pc to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
} else {
kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n",
arch->pc);
arch->pc = KVM_GUEST_KSEG0 + 0x180;
}
/* set pc to the exception entry point */
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
kvm_change_c0_guest_cause(cop0, (0xff),
(EXCCODE_TLBL << CAUSEB_EXCCODE));
......@@ -1995,8 +2049,6 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
/* XXXKYMA: is the context register used by linux??? */
kvm_write_c0_guest_entryhi(cop0, entryhi);
/* Blow away the shadow host TLBs */
kvm_mips_flush_host_tlb(1);
return EMULATE_DONE;
}
......@@ -2025,11 +2077,11 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
arch->pc);
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x0;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0;
} else {
kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n",
arch->pc);
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
}
kvm_change_c0_guest_cause(cop0, (0xff),
......@@ -2039,8 +2091,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
/* XXXKYMA: is the context register used by linux??? */
kvm_write_c0_guest_entryhi(cop0, entryhi);
/* Blow away the shadow host TLBs */
kvm_mips_flush_host_tlb(1);
return EMULATE_DONE;
}
......@@ -2067,15 +2117,14 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
kvm_debug("[EXL == 0] Delivering TLB MISS @ pc %#lx\n",
arch->pc);
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
} else {
kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n",
arch->pc);
arch->pc = KVM_GUEST_KSEG0 + 0x180;
}
/* Set PC to the exception entry point */
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
kvm_change_c0_guest_cause(cop0, (0xff),
(EXCCODE_TLBS << CAUSEB_EXCCODE));
......@@ -2083,41 +2132,10 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
/* XXXKYMA: is the context register used by linux??? */
kvm_write_c0_guest_entryhi(cop0, entryhi);
/* Blow away the shadow host TLBs */
kvm_mips_flush_host_tlb(1);
return EMULATE_DONE;
}
/* TLBMOD: store into address matching TLB with Dirty bit off */
enum emulation_result kvm_mips_handle_tlbmod(u32 cause, u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
enum emulation_result er = EMULATE_DONE;
#ifdef DEBUG
struct mips_coproc *cop0 = vcpu->arch.cop0;
unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) |
(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
int index;
/* If address not in the guest TLB, then we are in trouble */
index = kvm_mips_guest_tlb_lookup(vcpu, entryhi);
if (index < 0) {
/* XXXKYMA Invalidate and retry */
kvm_mips_host_tlb_inv(vcpu, vcpu->arch.host_cp0_badvaddr);
kvm_err("%s: host got TLBMOD for %#lx but entry not present in Guest TLB\n",
__func__, entryhi);
kvm_mips_dump_guest_tlbs(vcpu);
kvm_mips_dump_host_tlbs();
return EMULATE_FAIL;
}
#endif
er = kvm_mips_emulate_tlbmod(cause, opc, run, vcpu);
return er;
}
enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
u32 *opc,
struct kvm_run *run,
......@@ -2140,14 +2158,13 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
kvm_debug("[EXL == 0] Delivering TLB MOD @ pc %#lx\n",
arch->pc);
arch->pc = KVM_GUEST_KSEG0 + 0x180;
} else {
kvm_debug("[EXL == 1] Delivering TLB MOD @ pc %#lx\n",
arch->pc);
arch->pc = KVM_GUEST_KSEG0 + 0x180;
}
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
kvm_change_c0_guest_cause(cop0, (0xff),
(EXCCODE_MOD << CAUSEB_EXCCODE));
......@@ -2155,8 +2172,6 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
/* XXXKYMA: is the context register used by linux??? */
kvm_write_c0_guest_entryhi(cop0, entryhi);
/* Blow away the shadow host TLBs */
kvm_mips_flush_host_tlb(1);
return EMULATE_DONE;
}
......@@ -2181,7 +2196,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
}
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
kvm_change_c0_guest_cause(cop0, (0xff),
(EXCCODE_CPU << CAUSEB_EXCCODE));
......@@ -2215,7 +2230,7 @@ enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
(EXCCODE_RI << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
} else {
kvm_err("Trying to deliver RI when EXL is already set\n");
......@@ -2250,7 +2265,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
(EXCCODE_BP << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
} else {
kvm_err("Trying to deliver BP when EXL is already set\n");
......@@ -2285,7 +2300,7 @@ enum emulation_result kvm_mips_emulate_trap_exc(u32 cause,
(EXCCODE_TR << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
} else {
kvm_err("Trying to deliver TRAP when EXL is already set\n");
......@@ -2320,7 +2335,7 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause,
(EXCCODE_MSAFPE << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
} else {
kvm_err("Trying to deliver MSAFPE when EXL is already set\n");
......@@ -2355,7 +2370,7 @@ enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause,
(EXCCODE_FPE << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
} else {
kvm_err("Trying to deliver FPE when EXL is already set\n");
......@@ -2390,7 +2405,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause,
(EXCCODE_MSADIS << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
} else {
kvm_err("Trying to deliver MSADIS when EXL is already set\n");
......@@ -2409,6 +2424,7 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc,
enum emulation_result er = EMULATE_DONE;
unsigned long curr_pc;
union mips_instruction inst;
int err;
/*
* Update PC and hold onto current PC in case there is
......@@ -2422,11 +2438,9 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc,
/* Fetch the instruction. */
if (cause & CAUSEF_BD)
opc += 1;
inst.word = kvm_get_inst(opc, vcpu);
if (inst.word == KVM_INVALID_INST) {
kvm_err("%s: Cannot get inst @ %p\n", __func__, opc);
err = kvm_get_badinstr(opc, vcpu, &inst.word);
if (err) {
kvm_err("%s: Cannot get inst @ %p (%d)\n", __func__, opc, err);
return EMULATE_FAIL;
}
......@@ -2557,7 +2571,7 @@ static enum emulation_result kvm_mips_emulate_exc(u32 cause,
(exccode << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180;
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
kvm_debug("Delivering EXC %d @ pc %#lx, badVaddr: %#lx\n",
......@@ -2670,7 +2684,8 @@ enum emulation_result kvm_mips_check_privilege(u32 cause,
enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
struct kvm_vcpu *vcpu,
bool write_fault)
{
enum emulation_result er = EMULATE_DONE;
u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
......@@ -2726,7 +2741,8 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
* OK we have a Guest TLB entry, now inject it into the
* shadow host TLB
*/
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, va,
write_fault)) {
kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
__func__, va, index, vcpu,
read_c0_entryhi());
......
......@@ -12,8 +12,11 @@
*/
#include <linux/kvm_host.h>
#include <linux/log2.h>
#include <asm/mmu_context.h>
#include <asm/msa.h>
#include <asm/setup.h>
#include <asm/tlbex.h>
#include <asm/uasm.h>
/* Register names */
......@@ -50,6 +53,8 @@
/* Some CP0 registers */
#define C0_HWRENA 7, 0
#define C0_BADVADDR 8, 0
#define C0_BADINSTR 8, 1
#define C0_BADINSTRP 8, 2
#define C0_ENTRYHI 10, 0
#define C0_STATUS 12, 0
#define C0_CAUSE 13, 0
......@@ -89,6 +94,21 @@ static void *kvm_mips_build_ret_from_exit(void *addr);
static void *kvm_mips_build_ret_to_guest(void *addr);
static void *kvm_mips_build_ret_to_host(void *addr);
/*
* The version of this function in tlbex.c uses current_cpu_type(), but for KVM
* we assume symmetry.
*/
static int c0_kscratch(void)
{
switch (boot_cpu_type()) {
case CPU_XLP:
case CPU_XLR:
return 22;
default:
return 31;
}
}
/**
* kvm_mips_entry_setup() - Perform global setup for entry code.
*
......@@ -103,18 +123,21 @@ int kvm_mips_entry_setup(void)
* We prefer to use KScratchN registers if they are available over the
* defaults above, which may not work on all cores.
*/
unsigned int kscratch_mask = cpu_data[0].kscratch_mask & 0xfc;
unsigned int kscratch_mask = cpu_data[0].kscratch_mask;
if (pgd_reg != -1)
kscratch_mask &= ~BIT(pgd_reg);
/* Pick a scratch register for storing VCPU */
if (kscratch_mask) {
scratch_vcpu[0] = 31;
scratch_vcpu[0] = c0_kscratch();
scratch_vcpu[1] = ffs(kscratch_mask) - 1;
kscratch_mask &= ~BIT(scratch_vcpu[1]);
}
/* Pick a scratch register to use as a temp for saving state */
if (kscratch_mask) {
scratch_tmp[0] = 31;
scratch_tmp[0] = c0_kscratch();
scratch_tmp[1] = ffs(kscratch_mask) - 1;
kscratch_mask &= ~BIT(scratch_tmp[1]);
}
......@@ -130,7 +153,7 @@ static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp,
UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame);
/* Save the temp scratch register value in cp0_cause of stack frame */
if (scratch_tmp[0] == 31) {
if (scratch_tmp[0] == c0_kscratch()) {
UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]);
UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame);
}
......@@ -146,7 +169,7 @@ static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp,
UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame);
UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]);
if (scratch_tmp[0] == 31) {
if (scratch_tmp[0] == c0_kscratch()) {
UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame);
UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]);
}
......@@ -286,23 +309,26 @@ static void *kvm_mips_build_enter_guest(void *addr)
uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL);
uasm_i_xori(&p, T0, T0, KSU_USER);
uasm_il_bnez(&p, &r, T0, label_kernel_asid);
UASM_i_ADDIU(&p, T1, K1,
offsetof(struct kvm_vcpu_arch, guest_kernel_asid));
UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch,
guest_kernel_mm.context.asid));
/* else user */
UASM_i_ADDIU(&p, T1, K1,
offsetof(struct kvm_vcpu_arch, guest_user_asid));
UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch,
guest_user_mm.context.asid));
uasm_l_kernel_asid(&l, p);
/* t1: contains the base of the ASID array, need to get the cpu id */
/* smp_processor_id */
uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP);
/* x4 */
uasm_i_sll(&p, T2, T2, 2);
/* index the ASID array */
uasm_i_sll(&p, T2, T2, ilog2(sizeof(long)));
UASM_i_ADDU(&p, T3, T1, T2);
uasm_i_lw(&p, K0, 0, T3);
UASM_i_LW(&p, K0, 0, T3);
#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
/* x sizeof(struct cpuinfo_mips)/4 */
uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/4);
/*
* reuse ASID array offset
* cpuinfo_mips is a multiple of sizeof(long)
*/
uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/sizeof(long));
uasm_i_mul(&p, T2, T2, T3);
UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask);
......@@ -312,7 +338,20 @@ static void *kvm_mips_build_enter_guest(void *addr)
#else
uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID);
#endif
/*
* Set up KVM T&E GVA pgd.
* This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
* - call tlbmiss_handler_setup_pgd(mm->pgd)
* - but skips write into CP0_PWBase for now
*/
UASM_i_LW(&p, A0, (int)offsetof(struct mm_struct, pgd) -
(int)offsetof(struct mm_struct, context.asid), T1);
UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
uasm_i_jalr(&p, RA, T9);
uasm_i_mtc0(&p, K0, C0_ENTRYHI);
uasm_i_ehb(&p);
/* Disable RDHWR access */
......@@ -347,6 +386,80 @@ static void *kvm_mips_build_enter_guest(void *addr)
return p;
}
/**
* kvm_mips_build_tlb_refill_exception() - Assemble TLB refill handler.
* @addr: Address to start writing code.
* @handler: Address of common handler (within range of @addr).
*
* Assemble TLB refill exception fast path handler for guest execution.
*
* Returns: Next address after end of written function.
*/
void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler)
{
u32 *p = addr;
struct uasm_label labels[2];
struct uasm_reloc relocs[2];
struct uasm_label *l = labels;
struct uasm_reloc *r = relocs;
memset(labels, 0, sizeof(labels));
memset(relocs, 0, sizeof(relocs));
/* Save guest k1 into scratch register */
UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
/* Get the VCPU pointer from the VCPU scratch register */
UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
/* Save guest k0 into VCPU structure */
UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1);
/*
* Some of the common tlbex code uses current_cpu_type(). For KVM we
* assume symmetry and just disable preemption to silence the warning.
*/
preempt_disable();
/*
* Now for the actual refill bit. A lot of this can be common with the
* Linux TLB refill handler, however we don't need to handle so many
* cases. We only need to handle user mode refills, and user mode runs
* with 32-bit addressing.
*
* Therefore the branch to label_vmalloc generated by build_get_pmde64()
* that isn't resolved should never actually get taken and is harmless
* to leave in place for now.
*/
#ifdef CONFIG_64BIT
build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */
#else
build_get_pgde32(&p, K0, K1); /* get pgd in K1 */
#endif
/* we don't support huge pages yet */
build_get_ptep(&p, K0, K1);
build_update_entries(&p, K0, K1);
build_tlb_write_entry(&p, &l, &r, tlb_random);
preempt_enable();
/* Get the VCPU pointer from the VCPU scratch register again */
UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
/* Restore the guest's k0/k1 registers */
UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1);
uasm_i_ehb(&p);
UASM_i_MFC0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
/* Jump to guest */
uasm_i_eret(&p);
return p;
}
/**
* kvm_mips_build_exception() - Assemble first level guest exception handler.
* @addr: Address to start writing code.
......@@ -468,6 +581,18 @@ void *kvm_mips_build_exit(void *addr)
uasm_i_mfc0(&p, K0, C0_CAUSE);
uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1);
if (cpu_has_badinstr) {
uasm_i_mfc0(&p, K0, C0_BADINSTR);
uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch,
host_cp0_badinstr), K1);
}
if (cpu_has_badinstrp) {
uasm_i_mfc0(&p, K0, C0_BADINSTRP);
uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch,
host_cp0_badinstrp), K1);
}
/* Now restore the host state just enough to run the handlers */
/* Switch EBASE to the one used by Linux */
......
......@@ -183,10 +183,11 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
(exccode << CAUSEB_EXCCODE));
/* XXXSL Set PC to the interrupt exception entry point */
arch->pc = kvm_mips_guest_exception_base(vcpu);
if (kvm_read_c0_guest_cause(cop0) & CAUSEF_IV)
arch->pc = KVM_GUEST_KSEG0 + 0x200;
arch->pc += 0x200;
else
arch->pc = KVM_GUEST_KSEG0 + 0x180;
arch->pc += 0x180;
clear_bit(priority, &vcpu->arch.pending_exceptions);
}
......
......@@ -22,6 +22,7 @@
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <linux/kvm_host.h>
......@@ -63,18 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{NULL}
};
static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu)
{
int i;
for_each_possible_cpu(i) {
vcpu->arch.guest_kernel_asid[i] = 0;
vcpu->arch.guest_user_asid[i] = 0;
}
return 0;
}
/*
* XXXKYMA: We are simulatoring a processor that has the WII bit set in
* Config7, so we are "runnable" if interrupts are pending
......@@ -104,39 +93,12 @@ void kvm_arch_check_processor_compat(void *rtn)
*(int *)rtn = 0;
}
static void kvm_mips_init_tlbs(struct kvm *kvm)
{
unsigned long wired;
/*
* Add a wired entry to the TLB, it is used to map the commpage to
* the Guest kernel
*/
wired = read_c0_wired();
write_c0_wired(wired + 1);
mtc0_tlbw_hazard();
kvm->arch.commpage_tlb = wired;
kvm_debug("[%d] commpage TLB: %d\n", smp_processor_id(),
kvm->arch.commpage_tlb);
}
static void kvm_mips_init_vm_percpu(void *arg)
{
struct kvm *kvm = (struct kvm *)arg;
kvm_mips_init_tlbs(kvm);
kvm_mips_callbacks->vm_init(kvm);
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
if (atomic_inc_return(&kvm_mips_instance) == 1) {
kvm_debug("%s: 1st KVM instance, setup host TLB parameters\n",
__func__);
on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1);
}
/* Allocate page table to map GPA -> RPA */
kvm->arch.gpa_mm.pgd = kvm_pgd_alloc();
if (!kvm->arch.gpa_mm.pgd)
return -ENOMEM;
return 0;
}
......@@ -156,13 +118,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
unsigned int i;
struct kvm_vcpu *vcpu;
/* Put the pages we reserved for the guest pmap */
for (i = 0; i < kvm->arch.guest_pmap_npages; i++) {
if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE)
kvm_release_pfn_clean(kvm->arch.guest_pmap[i]);
}
kfree(kvm->arch.guest_pmap);
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_arch_vcpu_free(vcpu);
}
......@@ -177,25 +132,17 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
mutex_unlock(&kvm->lock);
}
static void kvm_mips_uninit_tlbs(void *arg)
static void kvm_mips_free_gpa_pt(struct kvm *kvm)
{
/* Restore wired count */
write_c0_wired(0);
mtc0_tlbw_hazard();
/* Clear out all the TLBs */
kvm_local_flush_tlb_all();
/* It should always be safe to remove after flushing the whole range */
WARN_ON(!kvm_mips_flush_gpa_pt(kvm, 0, ~0));
pgd_free(NULL, kvm->arch.gpa_mm.pgd);
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_mips_free_vcpus(kvm);
/* If this is the last instance, restore wired count */
if (atomic_dec_return(&kvm_mips_instance) == 0) {
kvm_debug("%s: last KVM instance, restoring TLB parameters\n",
__func__);
on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1);
}
kvm_mips_free_gpa_pt(kvm);
}
long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl,
......@@ -210,6 +157,32 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
return 0;
}
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
/* Flush whole GPA */
kvm_mips_flush_gpa_pt(kvm, 0, ~0);
/* Let implementation do the rest */
kvm_mips_callbacks->flush_shadow_all(kvm);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
/*
* The slot has been made invalid (ready for moving or deletion), so we
* need to ensure that it can no longer be accessed by any guest VCPUs.
*/
spin_lock(&kvm->mmu_lock);
/* Flush slot from GPA */
kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
slot->base_gfn + slot->npages - 1);
/* Let implementation do the rest */
kvm_mips_callbacks->flush_shadow_memslot(kvm, slot);
spin_unlock(&kvm->mmu_lock);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
const struct kvm_userspace_memory_region *mem,
......@@ -224,35 +197,32 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
unsigned long npages = 0;
int i;
int needs_flush;
kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n",
__func__, kvm, mem->slot, mem->guest_phys_addr,
mem->memory_size, mem->userspace_addr);
/* Setup Guest PMAP table */
if (!kvm->arch.guest_pmap) {
if (mem->slot == 0)
npages = mem->memory_size >> PAGE_SHIFT;
if (npages) {
kvm->arch.guest_pmap_npages = npages;
kvm->arch.guest_pmap =
kzalloc(npages * sizeof(unsigned long), GFP_KERNEL);
if (!kvm->arch.guest_pmap) {
kvm_err("Failed to allocate guest PMAP\n");
return;
}
kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n",
npages, kvm->arch.guest_pmap);
/* Now setup the page table */
for (i = 0; i < npages; i++)
kvm->arch.guest_pmap[i] = KVM_INVALID_PAGE;
}
/*
* If dirty page logging is enabled, write protect all pages in the slot
* ready for dirty logging.
*
* There is no need to do this in any of the following cases:
* CREATE: No dirty mappings will already exist.
* MOVE/DELETE: The old mappings will already have been cleaned up by
* kvm_arch_flush_shadow_memslot()
*/
if (change == KVM_MR_FLAGS_ONLY &&
(!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
new->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
spin_lock(&kvm->mmu_lock);
/* Write protect GPA page table entries */
needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
new->base_gfn + new->npages - 1);
/* Let implementation do the rest */
if (needs_flush)
kvm_mips_callbacks->flush_shadow_memslot(kvm, new);
spin_unlock(&kvm->mmu_lock);
}
}
......@@ -276,7 +246,7 @@ static inline void dump_handler(const char *symbol, void *start, void *end)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
int err, size;
void *gebase, *p, *handler;
void *gebase, *p, *handler, *refill_start, *refill_end;
int i;
struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
......@@ -329,8 +299,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
/* Build guest exception vectors dynamically in unmapped memory */
handler = gebase + 0x2000;
/* TLB Refill, EXL = 0 */
kvm_mips_build_exception(gebase, handler);
/* TLB refill */
refill_start = gebase;
refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler);
/* General Exception Entry point */
kvm_mips_build_exception(gebase + 0x180, handler);
......@@ -356,6 +327,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
pr_debug("#include <asm/regdef.h>\n");
pr_debug("\n");
dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p);
dump_handler("kvm_tlb_refill", refill_start, refill_end);
dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200);
dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run);
......@@ -406,6 +378,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_mips_dump_stats(vcpu);
kvm_mmu_free_memory_caches(vcpu);
kfree(vcpu->arch.guest_ebase);
kfree(vcpu->arch.kseg0_commpage);
kfree(vcpu);
......@@ -422,34 +395,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
return -ENOIOCTLCMD;
}
/* Must be called with preemption disabled, just before entering guest */
static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
int i, cpu = smp_processor_id();
unsigned int gasid;
/*
* Lazy host ASID regeneration for guest user mode.
* If the guest ASID has changed since the last guest usermode
* execution, regenerate the host ASID so as to invalidate stale TLB
* entries.
*/
if (!KVM_GUEST_KERNEL_MODE(vcpu)) {
gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID;
if (gasid != vcpu->arch.last_user_gasid) {
kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu,
vcpu);
vcpu->arch.guest_user_asid[cpu] =
vcpu->arch.guest_user_mm.context.asid[cpu];
for_each_possible_cpu(i)
if (i != cpu)
vcpu->arch.guest_user_asid[cpu] = 0;
vcpu->arch.last_user_gasid = gasid;
}
}
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r = 0;
......@@ -467,25 +412,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
lose_fpu(1);
local_irq_disable();
/* Check if we have any exceptions/interrupts pending */
kvm_mips_deliver_interrupts(vcpu,
kvm_read_c0_guest_cause(vcpu->arch.cop0));
guest_enter_irqoff();
/* Disable hardware page table walking while in guest */
htw_stop();
trace_kvm_enter(vcpu);
kvm_mips_check_asids(vcpu);
r = vcpu->arch.vcpu_run(run, vcpu);
trace_kvm_out(vcpu);
/*
* Make sure the read of VCPU requests in vcpu_run() callback is not
* reordered ahead of the write to vcpu->mode, or we could miss a TLB
* flush request while the requester sees the VCPU as outside of guest
* mode and not needing an IPI.
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
/* Re-enable HTW before enabling interrupts */
htw_start();
r = kvm_mips_callbacks->vcpu_run(run, vcpu);
trace_kvm_out(vcpu);
guest_exit_irqoff();
local_irq_enable();
......@@ -580,33 +520,6 @@ static u64 kvm_mips_get_one_regs[] = {
KVM_REG_MIPS_LO,
#endif
KVM_REG_MIPS_PC,
KVM_REG_MIPS_CP0_INDEX,
KVM_REG_MIPS_CP0_CONTEXT,
KVM_REG_MIPS_CP0_USERLOCAL,
KVM_REG_MIPS_CP0_PAGEMASK,
KVM_REG_MIPS_CP0_WIRED,
KVM_REG_MIPS_CP0_HWRENA,
KVM_REG_MIPS_CP0_BADVADDR,
KVM_REG_MIPS_CP0_COUNT,
KVM_REG_MIPS_CP0_ENTRYHI,
KVM_REG_MIPS_CP0_COMPARE,
KVM_REG_MIPS_CP0_STATUS,
KVM_REG_MIPS_CP0_CAUSE,
KVM_REG_MIPS_CP0_EPC,
KVM_REG_MIPS_CP0_PRID,
KVM_REG_MIPS_CP0_CONFIG,
KVM_REG_MIPS_CP0_CONFIG1,
KVM_REG_MIPS_CP0_CONFIG2,
KVM_REG_MIPS_CP0_CONFIG3,
KVM_REG_MIPS_CP0_CONFIG4,
KVM_REG_MIPS_CP0_CONFIG5,
KVM_REG_MIPS_CP0_CONFIG7,
KVM_REG_MIPS_CP0_ERROREPC,
KVM_REG_MIPS_COUNT_CTL,
KVM_REG_MIPS_COUNT_RESUME,
KVM_REG_MIPS_COUNT_HZ,
};
static u64 kvm_mips_get_one_regs_fpu[] = {
......@@ -619,15 +532,6 @@ static u64 kvm_mips_get_one_regs_msa[] = {
KVM_REG_MIPS_MSA_CSR,
};
static u64 kvm_mips_get_one_regs_kscratch[] = {
KVM_REG_MIPS_CP0_KSCRATCH1,
KVM_REG_MIPS_CP0_KSCRATCH2,
KVM_REG_MIPS_CP0_KSCRATCH3,
KVM_REG_MIPS_CP0_KSCRATCH4,
KVM_REG_MIPS_CP0_KSCRATCH5,
KVM_REG_MIPS_CP0_KSCRATCH6,
};
static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu)
{
unsigned long ret;
......@@ -641,7 +545,6 @@ static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu)
}
if (kvm_mips_guest_can_have_msa(&vcpu->arch))
ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32;
ret += __arch_hweight8(vcpu->arch.kscratch_enabled);
ret += kvm_mips_callbacks->num_regs(vcpu);
return ret;
......@@ -694,16 +597,6 @@ static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices)
}
}
for (i = 0; i < 6; ++i) {
if (!(vcpu->arch.kscratch_enabled & BIT(i + 2)))
continue;
if (copy_to_user(indices, &kvm_mips_get_one_regs_kscratch[i],
sizeof(kvm_mips_get_one_regs_kscratch[i])))
return -EFAULT;
++indices;
}
return kvm_mips_callbacks->copy_reg_indices(vcpu, indices);
}
......@@ -794,95 +687,6 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
v = fpu->msacsr;
break;
/* Co-processor 0 registers */
case KVM_REG_MIPS_CP0_INDEX:
v = (long)kvm_read_c0_guest_index(cop0);
break;
case KVM_REG_MIPS_CP0_CONTEXT:
v = (long)kvm_read_c0_guest_context(cop0);
break;
case KVM_REG_MIPS_CP0_USERLOCAL:
v = (long)kvm_read_c0_guest_userlocal(cop0);
break;
case KVM_REG_MIPS_CP0_PAGEMASK:
v = (long)kvm_read_c0_guest_pagemask(cop0);
break;
case KVM_REG_MIPS_CP0_WIRED:
v = (long)kvm_read_c0_guest_wired(cop0);
break;
case KVM_REG_MIPS_CP0_HWRENA:
v = (long)kvm_read_c0_guest_hwrena(cop0);
break;
case KVM_REG_MIPS_CP0_BADVADDR:
v = (long)kvm_read_c0_guest_badvaddr(cop0);
break;
case KVM_REG_MIPS_CP0_ENTRYHI:
v = (long)kvm_read_c0_guest_entryhi(cop0);
break;
case KVM_REG_MIPS_CP0_COMPARE:
v = (long)kvm_read_c0_guest_compare(cop0);
break;
case KVM_REG_MIPS_CP0_STATUS:
v = (long)kvm_read_c0_guest_status(cop0);
break;
case KVM_REG_MIPS_CP0_CAUSE:
v = (long)kvm_read_c0_guest_cause(cop0);
break;
case KVM_REG_MIPS_CP0_EPC:
v = (long)kvm_read_c0_guest_epc(cop0);
break;
case KVM_REG_MIPS_CP0_PRID:
v = (long)kvm_read_c0_guest_prid(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG:
v = (long)kvm_read_c0_guest_config(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG1:
v = (long)kvm_read_c0_guest_config1(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG2:
v = (long)kvm_read_c0_guest_config2(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG3:
v = (long)kvm_read_c0_guest_config3(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG4:
v = (long)kvm_read_c0_guest_config4(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG5:
v = (long)kvm_read_c0_guest_config5(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG7:
v = (long)kvm_read_c0_guest_config7(cop0);
break;
case KVM_REG_MIPS_CP0_ERROREPC:
v = (long)kvm_read_c0_guest_errorepc(cop0);
break;
case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
if (!(vcpu->arch.kscratch_enabled & BIT(idx)))
return -EINVAL;
switch (idx) {
case 2:
v = (long)kvm_read_c0_guest_kscratch1(cop0);
break;
case 3:
v = (long)kvm_read_c0_guest_kscratch2(cop0);
break;
case 4:
v = (long)kvm_read_c0_guest_kscratch3(cop0);
break;
case 5:
v = (long)kvm_read_c0_guest_kscratch4(cop0);
break;
case 6:
v = (long)kvm_read_c0_guest_kscratch5(cop0);
break;
case 7:
v = (long)kvm_read_c0_guest_kscratch6(cop0);
break;
}
break;
/* registers to be handled specially */
default:
ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v);
......@@ -1014,68 +818,6 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
fpu->msacsr = v;
break;
/* Co-processor 0 registers */
case KVM_REG_MIPS_CP0_INDEX:
kvm_write_c0_guest_index(cop0, v);
break;
case KVM_REG_MIPS_CP0_CONTEXT:
kvm_write_c0_guest_context(cop0, v);
break;
case KVM_REG_MIPS_CP0_USERLOCAL:
kvm_write_c0_guest_userlocal(cop0, v);
break;
case KVM_REG_MIPS_CP0_PAGEMASK:
kvm_write_c0_guest_pagemask(cop0, v);
break;
case KVM_REG_MIPS_CP0_WIRED:
kvm_write_c0_guest_wired(cop0, v);
break;
case KVM_REG_MIPS_CP0_HWRENA:
kvm_write_c0_guest_hwrena(cop0, v);
break;
case KVM_REG_MIPS_CP0_BADVADDR:
kvm_write_c0_guest_badvaddr(cop0, v);
break;
case KVM_REG_MIPS_CP0_ENTRYHI:
kvm_write_c0_guest_entryhi(cop0, v);
break;
case KVM_REG_MIPS_CP0_STATUS:
kvm_write_c0_guest_status(cop0, v);
break;
case KVM_REG_MIPS_CP0_EPC:
kvm_write_c0_guest_epc(cop0, v);
break;
case KVM_REG_MIPS_CP0_PRID:
kvm_write_c0_guest_prid(cop0, v);
break;
case KVM_REG_MIPS_CP0_ERROREPC:
kvm_write_c0_guest_errorepc(cop0, v);
break;
case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
if (!(vcpu->arch.kscratch_enabled & BIT(idx)))
return -EINVAL;
switch (idx) {
case 2:
kvm_write_c0_guest_kscratch1(cop0, v);
break;
case 3:
kvm_write_c0_guest_kscratch2(cop0, v);
break;
case 4:
kvm_write_c0_guest_kscratch3(cop0, v);
break;
case 5:
kvm_write_c0_guest_kscratch4(cop0, v);
break;
case 6:
kvm_write_c0_guest_kscratch5(cop0, v);
break;
case 7:
kvm_write_c0_guest_kscratch6(cop0, v);
break;
}
break;
/* registers to be handled specially */
default:
return kvm_mips_callbacks->set_one_reg(vcpu, reg, v);
......@@ -1144,18 +886,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
return -E2BIG;
return kvm_mips_copy_reg_indices(vcpu, user_list->reg);
}
case KVM_NMI:
/* Treat the NMI as a CPU reset */
r = kvm_mips_reset_vcpu(vcpu);
break;
case KVM_INTERRUPT:
{
struct kvm_mips_interrupt irq;
r = -EFAULT;
if (copy_from_user(&irq, argp, sizeof(irq)))
goto out;
return -EFAULT;
kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
irq.irq);
......@@ -1165,56 +901,57 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
case KVM_ENABLE_CAP: {
struct kvm_enable_cap cap;
r = -EFAULT;
if (copy_from_user(&cap, argp, sizeof(cap)))
goto out;
return -EFAULT;
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
default:
r = -ENOIOCTLCMD;
}
out:
return r;
}
/* Get (and clear) the dirty memory log for a memory slot. */
/**
* kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
* @kvm: kvm instance
* @log: slot id and address to which we copy the log
*
* Steps 1-4 below provide general overview of dirty page logging. See
* kvm_get_dirty_log_protect() function description for additional details.
*
* We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
* always flush the TLB (step 4) even if previous step failed and the dirty
* bitmap may be corrupt. Regardless of previous outcome the KVM logging API
* does not preclude user space subsequent dirty log read. Flushing TLB ensures
* writes will be marked dirty for next log read.
*
* 1. Take a snapshot of the bit and clear it if needed.
* 2. Write protect the corresponding page.
* 3. Copy the snapshot to the userspace.
* 4. Flush TLB's if needed.
*/
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
unsigned long ga, ga_end;
int is_dirty = 0;
bool is_dirty = false;
int r;
unsigned long n;
mutex_lock(&kvm->slots_lock);
r = kvm_get_dirty_log(kvm, log, &is_dirty);
if (r)
goto out;
r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
slots = kvm_memslots(kvm);
memslot = id_to_memslot(slots, log->slot);
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
kvm_info("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga,
ga_end);
n = kvm_dirty_bitmap_bytes(memslot);
memset(memslot->dirty_bitmap, 0, n);
/* Let implementation handle TLB/GVA invalidation */
kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
}
r = 0;
out:
mutex_unlock(&kvm->slots_lock);
return r;
}
long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
......@@ -1282,11 +1019,19 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
switch (ext) {
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_READONLY_MEM:
case KVM_CAP_SYNC_MMU:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
case KVM_CAP_NR_VCPUS:
r = num_online_cpus();
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
case KVM_CAP_MIPS_FPU:
/* We don't handle systems with inconsistent cpu_has_fpu */
r = !!raw_cpu_has_fpu;
......@@ -1400,13 +1145,23 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
kvm_mips_callbacks->vcpu_init(vcpu);
int err;
err = kvm_mips_callbacks->vcpu_init(vcpu);
if (err)
return err;
hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup;
return 0;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kvm_mips_callbacks->vcpu_uninit(vcpu);
}
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
......@@ -1440,8 +1195,11 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
enum emulation_result er = EMULATE_DONE;
u32 inst;
int ret = RESUME_GUEST;
vcpu->mode = OUTSIDE_GUEST_MODE;
/* re-enable HTW before enabling interrupts */
htw_start();
......@@ -1564,8 +1322,12 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
default:
if (cause & CAUSEF_BD)
opc += 1;
inst = 0;
kvm_get_badinstr(opc, vcpu, &inst);
kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n",
exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
exccode, opc, inst, badvaddr,
kvm_read_c0_guest_status(vcpu->arch.cop0));
kvm_arch_vcpu_dump_regs(vcpu);
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
......@@ -1593,7 +1355,15 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
if (ret == RESUME_GUEST) {
trace_kvm_reenter(vcpu);
kvm_mips_check_asids(vcpu);
/*
* Make sure the read of VCPU requests in vcpu_reenter()
* callback is not reordered ahead of the write to vcpu->mode,
* or we could miss a TLB flush request while the requester sees
* the VCPU as outside of guest mode and not needing an IPI.
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
kvm_mips_callbacks->vcpu_reenter(run, vcpu);
/*
* If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
......
......@@ -11,86 +11,995 @@
#include <linux/highmem.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
* for which pages need to be cached.
*/
#if defined(__PAGETABLE_PMD_FOLDED)
#define KVM_MMU_CACHE_MIN_PAGES 1
#else
#define KVM_MMU_CACHE_MIN_PAGES 2
#endif
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
int min, int max)
{
int cpu = smp_processor_id();
void *page;
return vcpu->arch.guest_kernel_asid[cpu] &
cpu_asid_mask(&cpu_data[cpu]);
BUG_ON(max > KVM_NR_MEM_OBJS);
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < max) {
page = (void *)__get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
}
static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu)
static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
{
int cpu = smp_processor_id();
while (mc->nobjs)
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
return vcpu->arch.guest_user_asid[cpu] &
cpu_asid_mask(&cpu_data[cpu]);
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
{
void *p;
BUG_ON(!mc || !mc->nobjs);
p = mc->objects[--mc->nobjs];
return p;
}
static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
int srcu_idx, err = 0;
kvm_pfn_t pfn;
mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
/**
* kvm_pgd_init() - Initialise KVM GPA page directory.
* @page: Pointer to page directory (PGD) for KVM GPA.
*
* Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
* representing no mappings. This is similar to pgd_init(), however it
* initialises all the page directory pointers, not just the ones corresponding
* to the userland address space (since it is for the guest physical address
* space rather than a virtual address space).
*/
static void kvm_pgd_init(void *page)
{
unsigned long *p, *end;
unsigned long entry;
#ifdef __PAGETABLE_PMD_FOLDED
entry = (unsigned long)invalid_pte_table;
#else
entry = (unsigned long)invalid_pmd_table;
#endif
p = (unsigned long *)page;
end = p + PTRS_PER_PGD;
do {
p[0] = entry;
p[1] = entry;
p[2] = entry;
p[3] = entry;
p[4] = entry;
p += 8;
p[-3] = entry;
p[-2] = entry;
p[-1] = entry;
} while (p != end);
}
/**
* kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
*
* Allocate a blank KVM GPA page directory (PGD) for representing guest physical
* to host physical page mappings.
*
* Returns: Pointer to new KVM GPA page directory.
* NULL on allocation failure.
*/
pgd_t *kvm_pgd_alloc(void)
{
pgd_t *ret;
ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER);
if (ret)
kvm_pgd_init(ret);
return ret;
}
/**
* kvm_mips_walk_pgd() - Walk page table with optional allocation.
* @pgd: Page directory pointer.
* @addr: Address to index page table using.
* @cache: MMU page cache to allocate new page tables from, or NULL.
*
* Walk the page tables pointed to by @pgd to find the PTE corresponding to the
* address @addr. If page tables don't exist for @addr, they will be created
* from the MMU cache if @cache is not NULL.
*
* Returns: Pointer to pte_t corresponding to @addr.
* NULL if a page table doesn't exist for @addr and !@cache.
* NULL if a page table allocation failed.
*/
static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
unsigned long addr)
{
pud_t *pud;
pmd_t *pmd;
pgd += pgd_index(addr);
if (pgd_none(*pgd)) {
/* Not used on MIPS yet */
BUG();
return NULL;
}
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
pmd_t *new_pmd;
if (!cache)
return NULL;
new_pmd = mmu_memory_cache_alloc(cache);
pmd_init((unsigned long)new_pmd,
(unsigned long)invalid_pte_table);
pud_populate(NULL, pud, new_pmd);
}
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
pte_t *new_pte;
if (!cache)
return NULL;
new_pte = mmu_memory_cache_alloc(cache);
clear_page(new_pte);
pmd_populate_kernel(NULL, pmd, new_pte);
}
return pte_offset(pmd, addr);
}
/* Caller must hold kvm->mm_lock */
static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
struct kvm_mmu_memory_cache *cache,
unsigned long addr)
{
return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
}
/*
* kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
* Flush a range of guest physical address space from the VM's GPA page tables.
*/
static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
unsigned long end_gpa)
{
int i_min = __pte_offset(start_gpa);
int i_max = __pte_offset(end_gpa);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
int i;
for (i = i_min; i <= i_max; ++i) {
if (!pte_present(pte[i]))
continue;
set_pte(pte + i, __pte(0));
}
return safe_to_remove;
}
static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
unsigned long end_gpa)
{
pte_t *pte;
unsigned long end = ~0ul;
int i_min = __pmd_offset(start_gpa);
int i_max = __pmd_offset(end_gpa);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
int i;
for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
if (!pmd_present(pmd[i]))
continue;
pte = pte_offset(pmd + i, 0);
if (i == i_max)
end = end_gpa;
if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
pmd_clear(pmd + i);
pte_free_kernel(NULL, pte);
} else {
safe_to_remove = false;
}
}
return safe_to_remove;
}
static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
unsigned long end_gpa)
{
pmd_t *pmd;
unsigned long end = ~0ul;
int i_min = __pud_offset(start_gpa);
int i_max = __pud_offset(end_gpa);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
int i;
for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
if (!pud_present(pud[i]))
continue;
pmd = pmd_offset(pud + i, 0);
if (i == i_max)
end = end_gpa;
if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
pud_clear(pud + i);
pmd_free(NULL, pmd);
} else {
safe_to_remove = false;
}
}
return safe_to_remove;
}
static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
unsigned long end_gpa)
{
pud_t *pud;
unsigned long end = ~0ul;
int i_min = pgd_index(start_gpa);
int i_max = pgd_index(end_gpa);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
int i;
for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
if (!pgd_present(pgd[i]))
continue;
pud = pud_offset(pgd + i, 0);
if (i == i_max)
end = end_gpa;
if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
pgd_clear(pgd + i);
pud_free(NULL, pud);
} else {
safe_to_remove = false;
}
}
return safe_to_remove;
}
/**
* kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
* @kvm: KVM pointer.
* @start_gfn: Guest frame number of first page in GPA range to flush.
* @end_gfn: Guest frame number of last page in GPA range to flush.
*
* Flushes a range of GPA mappings from the GPA page tables.
*
* The caller must hold the @kvm->mmu_lock spinlock.
*
* Returns: Whether its safe to remove the top level page directory because
* all lower levels have been removed.
*/
bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
{
return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
start_gfn << PAGE_SHIFT,
end_gfn << PAGE_SHIFT);
}
#define BUILD_PTE_RANGE_OP(name, op) \
static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \
unsigned long end) \
{ \
int ret = 0; \
int i_min = __pte_offset(start); \
int i_max = __pte_offset(end); \
int i; \
pte_t old, new; \
\
for (i = i_min; i <= i_max; ++i) { \
if (!pte_present(pte[i])) \
continue; \
\
old = pte[i]; \
new = op(old); \
if (pte_val(new) == pte_val(old)) \
continue; \
set_pte(pte + i, new); \
ret = 1; \
} \
return ret; \
} \
\
/* returns true if anything was done */ \
static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \
unsigned long end) \
{ \
int ret = 0; \
pte_t *pte; \
unsigned long cur_end = ~0ul; \
int i_min = __pmd_offset(start); \
int i_max = __pmd_offset(end); \
int i; \
\
for (i = i_min; i <= i_max; ++i, start = 0) { \
if (!pmd_present(pmd[i])) \
continue; \
\
pte = pte_offset(pmd + i, 0); \
if (i == i_max) \
cur_end = end; \
\
ret |= kvm_mips_##name##_pte(pte, start, cur_end); \
} \
return ret; \
} \
\
static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \
unsigned long end) \
{ \
int ret = 0; \
pmd_t *pmd; \
unsigned long cur_end = ~0ul; \
int i_min = __pud_offset(start); \
int i_max = __pud_offset(end); \
int i; \
\
for (i = i_min; i <= i_max; ++i, start = 0) { \
if (!pud_present(pud[i])) \
continue; \
\
pmd = pmd_offset(pud + i, 0); \
if (i == i_max) \
cur_end = end; \
\
ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \
} \
return ret; \
} \
\
static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \
unsigned long end) \
{ \
int ret = 0; \
pud_t *pud; \
unsigned long cur_end = ~0ul; \
int i_min = pgd_index(start); \
int i_max = pgd_index(end); \
int i; \
\
for (i = i_min; i <= i_max; ++i, start = 0) { \
if (!pgd_present(pgd[i])) \
continue; \
\
pud = pud_offset(pgd + i, 0); \
if (i == i_max) \
cur_end = end; \
\
ret |= kvm_mips_##name##_pud(pud, start, cur_end); \
} \
return ret; \
}
/*
* kvm_mips_mkclean_gpa_pt.
* Mark a range of guest physical address space clean (writes fault) in the VM's
* GPA page table to allow dirty page tracking.
*/
BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
/**
* kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
* @kvm: KVM pointer.
* @start_gfn: Guest frame number of first page in GPA range to flush.
* @end_gfn: Guest frame number of last page in GPA range to flush.
*
* Make a range of GPA mappings clean so that guest writes will fault and
* trigger dirty page logging.
*
* The caller must hold the @kvm->mmu_lock spinlock.
*
* Returns: Whether any GPA mappings were modified, which would require
* derived mappings (GVA page tables & TLB enties) to be
* invalidated.
*/
int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
{
return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
start_gfn << PAGE_SHIFT,
end_gfn << PAGE_SHIFT);
}
/**
* kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
* @kvm: The KVM pointer
* @slot: The memory slot associated with mask
* @gfn_offset: The gfn offset in memory slot
* @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
* slot to be write protected
*
* Walks bits set in mask write protects the associated pte's. Caller must
* acquire @kvm->mmu_lock.
*/
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
{
gfn_t base_gfn = slot->base_gfn + gfn_offset;
gfn_t start = base_gfn + __ffs(mask);
gfn_t end = base_gfn + __fls(mask);
kvm_mips_mkclean_gpa_pt(kvm, start, end);
}
/*
* kvm_mips_mkold_gpa_pt.
* Mark a range of guest physical address space old (all accesses fault) in the
* VM's GPA page table to allow detection of commonly used pages.
*/
BUILD_PTE_RANGE_OP(mkold, pte_mkold)
static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
gfn_t end_gfn)
{
return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
start_gfn << PAGE_SHIFT,
end_gfn << PAGE_SHIFT);
}
static int handle_hva_to_gpa(struct kvm *kvm,
unsigned long start,
unsigned long end,
int (*handler)(struct kvm *kvm, gfn_t gfn,
gpa_t gfn_end,
struct kvm_memory_slot *memslot,
void *data),
void *data)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int ret = 0;
if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE)
slots = kvm_memslots(kvm);
/* we only care about the pages that the guest sees */
kvm_for_each_memslot(memslot, slots) {
unsigned long hva_start, hva_end;
gfn_t gfn, gfn_end;
hva_start = max(start, memslot->userspace_addr);
hva_end = min(end, memslot->userspace_addr +
(memslot->npages << PAGE_SHIFT));
if (hva_start >= hva_end)
continue;
/*
* {gfn(page) | page intersects with [hva_start, hva_end)} =
* {gfn_start, gfn_start+1, ..., gfn_end-1}.
*/
gfn = hva_to_gfn_memslot(hva_start, memslot);
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
ret |= handler(kvm, gfn, gfn_end, memslot, data);
}
return ret;
}
static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
struct kvm_memory_slot *memslot, void *data)
{
kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end);
return 1;
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
unsigned long end = hva + PAGE_SIZE;
handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
kvm_mips_callbacks->flush_shadow_all(kvm);
return 0;
}
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{
handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
kvm_mips_callbacks->flush_shadow_all(kvm);
return 0;
}
static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
struct kvm_memory_slot *memslot, void *data)
{
gpa_t gpa = gfn << PAGE_SHIFT;
pte_t hva_pte = *(pte_t *)data;
pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
pte_t old_pte;
if (!gpa_pte)
return 0;
/* Mapping may need adjusting depending on memslot flags */
old_pte = *gpa_pte;
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
hva_pte = pte_mkclean(hva_pte);
else if (memslot->flags & KVM_MEM_READONLY)
hva_pte = pte_wrprotect(hva_pte);
set_pte(gpa_pte, hva_pte);
/* Replacing an absent or old page doesn't need flushes */
if (!pte_present(old_pte) || !pte_young(old_pte))
return 0;
/* Pages swapped, aged, moved, or cleaned require flushes */
return !pte_present(hva_pte) ||
!pte_young(hva_pte) ||
pte_pfn(old_pte) != pte_pfn(hva_pte) ||
(pte_dirty(old_pte) && !pte_dirty(hva_pte));
}
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
{
unsigned long end = hva + PAGE_SIZE;
int ret;
ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte);
if (ret)
kvm_mips_callbacks->flush_shadow_all(kvm);
}
static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
struct kvm_memory_slot *memslot, void *data)
{
return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end);
}
static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
struct kvm_memory_slot *memslot, void *data)
{
gpa_t gpa = gfn << PAGE_SHIFT;
pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
if (!gpa_pte)
return 0;
return pte_young(*gpa_pte);
}
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
}
/**
* _kvm_mips_map_page_fast() - Fast path GPA fault handler.
* @vcpu: VCPU pointer.
* @gpa: Guest physical address of fault.
* @write_fault: Whether the fault was due to a write.
* @out_entry: New PTE for @gpa (written on success unless NULL).
* @out_buddy: New PTE for @gpa's buddy (written on success unless
* NULL).
*
* Perform fast path GPA fault handling, doing all that can be done without
* calling into KVM. This handles marking old pages young (for idle page
* tracking), and dirtying of clean pages (for dirty page logging).
*
* Returns: 0 on success, in which case we can update derived mappings and
* resume guest execution.
* -EFAULT on failure due to absent GPA mapping or write to
* read-only page, in which case KVM must be consulted.
*/
static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
bool write_fault,
pte_t *out_entry, pte_t *out_buddy)
{
struct kvm *kvm = vcpu->kvm;
gfn_t gfn = gpa >> PAGE_SHIFT;
pte_t *ptep;
kvm_pfn_t pfn = 0; /* silence bogus GCC warning */
bool pfn_valid = false;
int ret = 0;
spin_lock(&kvm->mmu_lock);
/* Fast path - just check GPA page table for an existing entry */
ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
if (!ptep || !pte_present(*ptep)) {
ret = -EFAULT;
goto out;
}
/* Track access to pages marked old */
if (!pte_young(*ptep)) {
set_pte(ptep, pte_mkyoung(*ptep));
pfn = pte_pfn(*ptep);
pfn_valid = true;
/* call kvm_set_pfn_accessed() after unlock */
}
if (write_fault && !pte_dirty(*ptep)) {
if (!pte_write(*ptep)) {
ret = -EFAULT;
goto out;
}
/* Track dirtying of writeable pages */
set_pte(ptep, pte_mkdirty(*ptep));
pfn = pte_pfn(*ptep);
mark_page_dirty(kvm, gfn);
kvm_set_pfn_dirty(pfn);
}
if (out_entry)
*out_entry = *ptep;
if (out_buddy)
*out_buddy = *ptep_buddy(ptep);
out:
spin_unlock(&kvm->mmu_lock);
if (pfn_valid)
kvm_set_pfn_accessed(pfn);
return ret;
}
/**
* kvm_mips_map_page() - Map a guest physical page.
* @vcpu: VCPU pointer.
* @gpa: Guest physical address of fault.
* @write_fault: Whether the fault was due to a write.
* @out_entry: New PTE for @gpa (written on success unless NULL).
* @out_buddy: New PTE for @gpa's buddy (written on success unless
* NULL).
*
* Handle GPA faults by creating a new GPA mapping (or updating an existing
* one).
*
* This takes care of marking pages young or dirty (idle/dirty page tracking),
* asking KVM for the corresponding PFN, and creating a mapping in the GPA page
* tables. Derived mappings (GVA page tables and TLBs) must be handled by the
* caller.
*
* Returns: 0 on success, in which case the caller may use the @out_entry
* and @out_buddy PTEs to update derived mappings and resume guest
* execution.
* -EFAULT if there is no memory region at @gpa or a write was
* attempted to a read-only memory region. This is usually handled
* as an MMIO access.
*/
static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
bool write_fault,
pte_t *out_entry, pte_t *out_buddy)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
gfn_t gfn = gpa >> PAGE_SHIFT;
int srcu_idx, err;
kvm_pfn_t pfn;
pte_t *ptep, entry, old_pte;
bool writeable;
unsigned long prot_bits;
unsigned long mmu_seq;
/* Try the fast path to handle old / clean pages */
srcu_idx = srcu_read_lock(&kvm->srcu);
pfn = gfn_to_pfn(kvm, gfn);
err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
out_buddy);
if (!err)
goto out;
/* We need a minimum of cached pages ready for page table creation */
err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
if (err)
goto out;
retry:
/*
* Used to check for invalidations in progress, of the pfn that is
* returned by pfn_to_pfn_prot below.
*/
mmu_seq = kvm->mmu_notifier_seq;
/*
* Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in
* gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
* risk the page we get a reference to getting unmapped before we have a
* chance to grab the mmu_lock without mmu_notifier_retry() noticing.
*
* This smp_rmb() pairs with the effective smp_wmb() of the combination
* of the pte_unmap_unlock() after the PTE is zapped, and the
* spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
* mmu_notifier_seq is incremented.
*/
smp_rmb();
/* Slow path - ask KVM core whether we can access this GPA */
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
if (is_error_noslot_pfn(pfn)) {
kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
err = -EFAULT;
goto out;
}
kvm->arch.guest_pmap[gfn] = pfn;
spin_lock(&kvm->mmu_lock);
/* Check if an invalidation has taken place since we got pfn */
if (mmu_notifier_retry(kvm, mmu_seq)) {
/*
* This can happen when mappings are changed asynchronously, but
* also synchronously if a COW is triggered by
* gfn_to_pfn_prot().
*/
spin_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
goto retry;
}
/* Ensure page tables are allocated */
ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
/* Set up the PTE */
prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
if (writeable) {
prot_bits |= _PAGE_WRITE;
if (write_fault) {
prot_bits |= __WRITEABLE;
mark_page_dirty(kvm, gfn);
kvm_set_pfn_dirty(pfn);
}
}
entry = pfn_pte(pfn, __pgprot(prot_bits));
/* Write the PTE */
old_pte = *ptep;
set_pte(ptep, entry);
err = 0;
if (out_entry)
*out_entry = *ptep;
if (out_buddy)
*out_buddy = *ptep_buddy(ptep);
spin_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
kvm_set_pfn_accessed(pfn);
out:
srcu_read_unlock(&kvm->srcu, srcu_idx);
return err;
}
/* Translate guest KSEG0 addresses to Host PA */
unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
unsigned long gva)
static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu,
unsigned long addr)
{
gfn_t gfn;
unsigned long offset = gva & ~PAGE_MASK;
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
pgd_t *pgdp;
int ret;
if (KVM_GUEST_KSEGX(gva) != KVM_GUEST_KSEG0) {
kvm_err("%s/%p: Invalid gva: %#lx\n", __func__,
__builtin_return_address(0), gva);
return KVM_INVALID_PAGE;
/* We need a minimum of cached pages ready for page table creation */
ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
if (ret)
return NULL;
if (KVM_GUEST_KERNEL_MODE(vcpu))
pgdp = vcpu->arch.guest_kernel_mm.pgd;
else
pgdp = vcpu->arch.guest_user_mm.pgd;
return kvm_mips_walk_pgd(pgdp, memcache, addr);
}
void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr,
bool user)
{
pgd_t *pgdp;
pte_t *ptep;
addr &= PAGE_MASK << 1;
pgdp = vcpu->arch.guest_kernel_mm.pgd;
ptep = kvm_mips_walk_pgd(pgdp, NULL, addr);
if (ptep) {
ptep[0] = pfn_pte(0, __pgprot(0));
ptep[1] = pfn_pte(0, __pgprot(0));
}
if (user) {
pgdp = vcpu->arch.guest_user_mm.pgd;
ptep = kvm_mips_walk_pgd(pgdp, NULL, addr);
if (ptep) {
ptep[0] = pfn_pte(0, __pgprot(0));
ptep[1] = pfn_pte(0, __pgprot(0));
}
}
}
/*
* kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}.
* Flush a range of guest physical address space from the VM's GPA page tables.
*/
static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva,
unsigned long end_gva)
{
int i_min = __pte_offset(start_gva);
int i_max = __pte_offset(end_gva);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
int i;
/*
* There's no freeing to do, so there's no point clearing individual
* entries unless only part of the last level page table needs flushing.
*/
if (safe_to_remove)
return true;
for (i = i_min; i <= i_max; ++i) {
if (!pte_present(pte[i]))
continue;
set_pte(pte + i, __pte(0));
}
return false;
}
static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva,
unsigned long end_gva)
{
pte_t *pte;
unsigned long end = ~0ul;
int i_min = __pmd_offset(start_gva);
int i_max = __pmd_offset(end_gva);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
int i;
for (i = i_min; i <= i_max; ++i, start_gva = 0) {
if (!pmd_present(pmd[i]))
continue;
pte = pte_offset(pmd + i, 0);
if (i == i_max)
end = end_gva;
if (kvm_mips_flush_gva_pte(pte, start_gva, end)) {
pmd_clear(pmd + i);
pte_free_kernel(NULL, pte);
} else {
safe_to_remove = false;
}
}
return safe_to_remove;
}
static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
unsigned long end_gva)
{
pmd_t *pmd;
unsigned long end = ~0ul;
int i_min = __pud_offset(start_gva);
int i_max = __pud_offset(end_gva);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
int i;
for (i = i_min; i <= i_max; ++i, start_gva = 0) {
if (!pud_present(pud[i]))
continue;
pmd = pmd_offset(pud + i, 0);
if (i == i_max)
end = end_gva;
if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) {
pud_clear(pud + i);
pmd_free(NULL, pmd);
} else {
safe_to_remove = false;
}
}
return safe_to_remove;
}
static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
unsigned long end_gva)
{
pud_t *pud;
unsigned long end = ~0ul;
int i_min = pgd_index(start_gva);
int i_max = pgd_index(end_gva);
bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
int i;
for (i = i_min; i <= i_max; ++i, start_gva = 0) {
if (!pgd_present(pgd[i]))
continue;
pud = pud_offset(pgd + i, 0);
if (i == i_max)
end = end_gva;
if (kvm_mips_flush_gva_pud(pud, start_gva, end)) {
pgd_clear(pgd + i);
pud_free(NULL, pud);
} else {
safe_to_remove = false;
}
}
return safe_to_remove;
}
gfn = (KVM_GUEST_CPHYSADDR(gva) >> PAGE_SHIFT);
void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags)
{
if (flags & KMF_GPA) {
/* all of guest virtual address space could be affected */
if (flags & KMF_KERN)
/* useg, kseg0, seg2/3 */
kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff);
else
/* useg */
kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff);
} else {
/* useg */
kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff);
if (gfn >= kvm->arch.guest_pmap_npages) {
kvm_err("%s: Invalid gfn: %#llx, GVA: %#lx\n", __func__, gfn,
gva);
return KVM_INVALID_PAGE;
/* kseg2/3 */
if (flags & KMF_KERN)
kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff);
}
}
if (kvm_mips_map_page(vcpu->kvm, gfn) < 0)
return KVM_INVALID_ADDR;
static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte)
{
/*
* Don't leak writeable but clean entries from GPA page tables. We don't
* want the normal Linux tlbmod handler to handle dirtying when KVM
* accesses guest memory.
*/
if (!pte_dirty(pte))
pte = pte_wrprotect(pte);
return pte;
}
static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo)
{
/* Guest EntryLo overrides host EntryLo */
if (!(entrylo & ENTRYLO_D))
pte = pte_mkclean(pte);
return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset;
return kvm_mips_gpa_pte_to_gva_unmapped(pte);
}
/* XXXKYMA: Must be called with interrupts disabled */
int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
struct kvm_vcpu *vcpu)
struct kvm_vcpu *vcpu,
bool write_fault)
{
gfn_t gfn;
kvm_pfn_t pfn0, pfn1;
unsigned long vaddr = 0;
unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
struct kvm *kvm = vcpu->kvm;
const int flush_dcache_mask = 0;
int ret;
unsigned long gpa;
pte_t pte_gpa[2], *ptep_gva;
int idx;
if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
......@@ -98,49 +1007,39 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
return -1;
}
gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
if ((gfn | 1) >= kvm->arch.guest_pmap_npages) {
kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
gfn, badvaddr);
kvm_mips_dump_host_tlbs();
return -1;
}
vaddr = badvaddr & (PAGE_MASK << 1);
if (kvm_mips_map_page(vcpu->kvm, gfn) < 0)
/* Get the GPA page table entry */
gpa = KVM_GUEST_CPHYSADDR(badvaddr);
idx = (badvaddr >> PAGE_SHIFT) & 1;
if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx],
&pte_gpa[!idx]) < 0)
return -1;
if (kvm_mips_map_page(vcpu->kvm, gfn ^ 0x1) < 0)
/* Get the GVA page table entry */
ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE);
if (!ptep_gva) {
kvm_err("No ptep for gva %lx\n", badvaddr);
return -1;
}
pfn0 = kvm->arch.guest_pmap[gfn & ~0x1];
pfn1 = kvm->arch.guest_pmap[gfn | 0x1];
entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
ENTRYLO_D | ENTRYLO_V;
entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
ENTRYLO_D | ENTRYLO_V;
preempt_disable();
entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
flush_dcache_mask);
preempt_enable();
/* Copy a pair of entries from GPA page table to GVA page table */
ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]);
ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]);
return ret;
/* Invalidate this entry in the TLB, guest kernel ASID only */
kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true);
return 0;
}
int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
struct kvm_mips_tlb *tlb)
struct kvm_mips_tlb *tlb,
unsigned long gva,
bool write_fault)
{
unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
struct kvm *kvm = vcpu->kvm;
kvm_pfn_t pfn0, pfn1;
gfn_t gfn0, gfn1;
long tlb_lo[2];
int ret;
pte_t pte_gpa[2], *ptep_buddy, *ptep_gva;
unsigned int idx = TLB_LO_IDX(*tlb, gva);
bool kernel = KVM_GUEST_KERNEL_MODE(vcpu);
tlb_lo[0] = tlb->tlb_lo[0];
tlb_lo[1] = tlb->tlb_lo[1];
......@@ -149,70 +1048,64 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
* The commpage address must not be mapped to anything else if the guest
* TLB contains entries nearby, or commpage accesses will break.
*/
if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) &
VPN2_MASK & (PAGE_MASK << 1)))
tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0;
gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT;
gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT;
if (gfn0 >= kvm->arch.guest_pmap_npages ||
gfn1 >= kvm->arch.guest_pmap_npages) {
kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n",
__func__, gfn0, gfn1, tlb->tlb_hi);
kvm_mips_dump_guest_tlbs(vcpu);
return -1;
}
if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1)))
tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0;
if (kvm_mips_map_page(kvm, gfn0) < 0)
/* Get the GPA page table entry */
if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]),
write_fault, &pte_gpa[idx], NULL) < 0)
return -1;
if (kvm_mips_map_page(kvm, gfn1) < 0)
/* And its GVA buddy's GPA page table entry if it also exists */
pte_gpa[!idx] = pfn_pte(0, __pgprot(0));
if (tlb_lo[!idx] & ENTRYLO_V) {
spin_lock(&kvm->mmu_lock);
ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL,
mips3_tlbpfn_to_paddr(tlb_lo[!idx]));
if (ptep_buddy)
pte_gpa[!idx] = *ptep_buddy;
spin_unlock(&kvm->mmu_lock);
}
/* Get the GVA page table entry pair */
ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE);
if (!ptep_gva) {
kvm_err("No ptep for gva %lx\n", gva);
return -1;
}
pfn0 = kvm->arch.guest_pmap[gfn0];
pfn1 = kvm->arch.guest_pmap[gfn1];
/* Copy a pair of entries from GPA page table to GVA page table */
ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]);
ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]);
/* Get attributes from the Guest TLB */
entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
(tlb_lo[0] & ENTRYLO_D) |
(tlb_lo[0] & ENTRYLO_V);
entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
(tlb_lo[1] & ENTRYLO_D) |
(tlb_lo[1] & ENTRYLO_V);
/* Invalidate this entry in the TLB, current guest mode ASID only */
kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel);
kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
tlb->tlb_lo[0], tlb->tlb_lo[1]);
preempt_disable();
entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
kvm_mips_get_kernel_asid(vcpu) :
kvm_mips_get_user_asid(vcpu));
ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
tlb->tlb_mask);
preempt_enable();
return ret;
return 0;
}
void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
struct kvm_vcpu *vcpu)
{
unsigned long asid = asid_cache(cpu);
asid += cpu_asid_inc();
if (!(asid & cpu_asid_mask(&cpu_data[cpu]))) {
if (cpu_has_vtag_icache)
flush_icache_all();
kvm_local_flush_tlb_all(); /* start new asid cycle */
kvm_pfn_t pfn;
pte_t *ptep;
if (!asid) /* fix version if needed */
asid = asid_first_version(cpu);
ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr);
if (!ptep) {
kvm_err("No ptep for commpage %lx\n", badvaddr);
return -1;
}
cpu_context(cpu, mm) = asid_cache(cpu) = asid;
pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage));
/* Also set valid and dirty, so refill handler doesn't have to */
*ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED)));
/* Invalidate this entry in the TLB, guest kernel ASID only */
kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true);
return 0;
}
/**
......@@ -235,42 +1128,13 @@ static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
/* Restore ASID once we are scheduled back after preemption */
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]);
unsigned long flags;
int newasid = 0;
kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
/* Allocate new kernel and user ASIDs if needed */
local_irq_save(flags);
if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) &
asid_version_mask(cpu)) {
kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu);
vcpu->arch.guest_kernel_asid[cpu] =
vcpu->arch.guest_kernel_mm.context.asid[cpu];
newasid++;
kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
cpu_context(cpu, current->mm));
kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n",
cpu, vcpu->arch.guest_kernel_asid[cpu]);
}
if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) &
asid_version_mask(cpu)) {
kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
vcpu->arch.guest_user_asid[cpu] =
vcpu->arch.guest_user_mm.context.asid[cpu];
newasid++;
kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
cpu_context(cpu, current->mm));
kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu,
vcpu->arch.guest_user_asid[cpu]);
}
vcpu->cpu = cpu;
if (vcpu->arch.last_sched_cpu != cpu) {
kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
......@@ -282,42 +1146,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_mips_migrate_count(vcpu);
}
if (!newasid) {
/*
* If we preempted while the guest was executing, then reload
* the pre-empted ASID
*/
if (current->flags & PF_VCPU) {
write_c0_entryhi(vcpu->arch.
preempt_entryhi & asid_mask);
ehb();
}
} else {
/* New ASIDs were allocated for the VM */
/*
* Were we in guest context? If so then the pre-empted ASID is
* no longer valid, we need to set it to what it should be based
* on the mode of the Guest (Kernel/User)
*/
if (current->flags & PF_VCPU) {
if (KVM_GUEST_KERNEL_MODE(vcpu))
write_c0_entryhi(vcpu->arch.
guest_kernel_asid[cpu] &
asid_mask);
else
write_c0_entryhi(vcpu->arch.
guest_user_asid[cpu] &
asid_mask);
ehb();
}
}
/* restore guest state to registers */
kvm_mips_callbacks->vcpu_set_regs(vcpu);
kvm_mips_callbacks->vcpu_load(vcpu, cpu);
local_irq_restore(flags);
}
/* ASID can change if another task is scheduled during preemption */
......@@ -329,75 +1161,90 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
local_irq_save(flags);
cpu = smp_processor_id();
vcpu->arch.preempt_entryhi = read_c0_entryhi();
vcpu->arch.last_sched_cpu = cpu;
vcpu->cpu = -1;
/* save guest state in registers */
kvm_mips_callbacks->vcpu_get_regs(vcpu);
if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
asid_version_mask(cpu))) {
kvm_debug("%s: Dropping MMU Context: %#lx\n", __func__,
cpu_context(cpu, current->mm));
drop_mmu_context(current->mm, cpu);
}
write_c0_entryhi(cpu_asid(cpu, current->mm));
ehb();
kvm_mips_callbacks->vcpu_put(vcpu, cpu);
local_irq_restore(flags);
}
u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu)
/**
* kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault.
* @vcpu: Virtual CPU.
* @gva: Guest virtual address to be accessed.
* @write: True if write attempted (must be dirtied and made writable).
*
* Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and
* dirtying the page if @write so that guest instructions can be modified.
*
* Returns: KVM_MIPS_MAPPED on success.
* KVM_MIPS_GVA if bad guest virtual address.
* KVM_MIPS_GPA if bad guest physical address.
* KVM_MIPS_TLB if guest TLB not present.
* KVM_MIPS_TLBINV if guest TLB present but not valid.
* KVM_MIPS_TLBMOD if guest TLB read only.
*/
enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
unsigned long gva,
bool write)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
unsigned long paddr, flags, vpn2, asid;
unsigned long va = (unsigned long)opc;
void *vaddr;
u32 inst;
struct kvm_mips_tlb *tlb;
int index;
if (KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0 ||
KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
local_irq_save(flags);
index = kvm_mips_host_tlb_lookup(vcpu, va);
if (index >= 0) {
inst = *(opc);
if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) {
if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0)
return KVM_MIPS_GPA;
} else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) ||
KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) {
/* Address should be in the guest TLB */
index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) |
(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID));
if (index < 0)
return KVM_MIPS_TLB;
tlb = &vcpu->arch.guest_tlb[index];
/* Entry should be valid, and dirty for writes */
if (!TLB_IS_VALID(*tlb, gva))
return KVM_MIPS_TLBINV;
if (write && !TLB_IS_DIRTY(*tlb, gva))
return KVM_MIPS_TLBMOD;
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write))
return KVM_MIPS_GPA;
} else {
vpn2 = va & VPN2_MASK;
asid = kvm_read_c0_guest_entryhi(cop0) &
KVM_ENTRYHI_ASID;
index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid);
if (index < 0) {
kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n",
__func__, opc, vcpu, read_c0_entryhi());
kvm_mips_dump_host_tlbs();
kvm_mips_dump_guest_tlbs(vcpu);
local_irq_restore(flags);
return KVM_INVALID_INST;
}
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
&vcpu->arch.guest_tlb[index])) {
kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n",
__func__, opc, index, vcpu,
read_c0_entryhi());
kvm_mips_dump_guest_tlbs(vcpu);
local_irq_restore(flags);
return KVM_INVALID_INST;
}
inst = *(opc);
return KVM_MIPS_GVA;
}
local_irq_restore(flags);
} else if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, va);
vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr)));
vaddr += paddr & ~PAGE_MASK;
inst = *(u32 *)vaddr;
kunmap_atomic(vaddr);
} else {
kvm_err("%s: illegal address: %p\n", __func__, opc);
return KVM_INVALID_INST;
return KVM_MIPS_MAPPED;
}
int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
{
int err;
retry:
kvm_trap_emul_gva_lockless_begin(vcpu);
err = get_user(*out, opc);
kvm_trap_emul_gva_lockless_end(vcpu);
if (unlikely(err)) {
/*
* Try to handle the fault, maybe we just raced with a GVA
* invalidation.
*/
err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc,
false);
if (unlikely(err)) {
kvm_err("%s: illegal address: %p\n",
__func__, opc);
return -EFAULT;
}
return inst;
/* Hopefully it'll work now */
goto retry;
}
return 0;
}
......@@ -33,28 +33,20 @@
#define KVM_GUEST_PC_TLB 0
#define KVM_GUEST_SP_TLB 1
atomic_t kvm_mips_instance;
EXPORT_SYMBOL_GPL(kvm_mips_instance);
static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
int cpu = smp_processor_id();
return vcpu->arch.guest_kernel_asid[cpu] &
cpu_asid_mask(&cpu_data[cpu]);
return cpu_asid(cpu, kern_mm);
}
static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu)
{
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
int cpu = smp_processor_id();
return vcpu->arch.guest_user_asid[cpu] &
cpu_asid_mask(&cpu_data[cpu]);
}
inline u32 kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.commpage_tlb;
return cpu_asid(cpu, user_mm);
}
/* Structure defining an tlb entry data set. */
......@@ -104,109 +96,6 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs);
/* XXXKYMA: Must be called with interrupts disabled */
/* set flush_dcache_mask == 0 if no dcache flush required */
int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
unsigned long entrylo0, unsigned long entrylo1,
int flush_dcache_mask)
{
unsigned long flags;
unsigned long old_entryhi;
int idx;
local_irq_save(flags);
old_entryhi = read_c0_entryhi();
write_c0_entryhi(entryhi);
mtc0_tlbw_hazard();
tlb_probe();
tlb_probe_hazard();
idx = read_c0_index();
if (idx > current_cpu_data.tlbsize) {
kvm_err("%s: Invalid Index: %d\n", __func__, idx);
kvm_mips_dump_host_tlbs();
local_irq_restore(flags);
return -1;
}
write_c0_entrylo0(entrylo0);
write_c0_entrylo1(entrylo1);
mtc0_tlbw_hazard();
if (idx < 0)
tlb_write_random();
else
tlb_write_indexed();
tlbw_use_hazard();
kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n",
vcpu->arch.pc, idx, read_c0_entryhi(),
read_c0_entrylo0(), read_c0_entrylo1());
/* Flush D-cache */
if (flush_dcache_mask) {
if (entrylo0 & ENTRYLO_V) {
++vcpu->stat.flush_dcache_exits;
flush_data_cache_page((entryhi & VPN2_MASK) &
~flush_dcache_mask);
}
if (entrylo1 & ENTRYLO_V) {
++vcpu->stat.flush_dcache_exits;
flush_data_cache_page(((entryhi & VPN2_MASK) &
~flush_dcache_mask) |
(0x1 << PAGE_SHIFT));
}
}
/* Restore old ASID */
write_c0_entryhi(old_entryhi);
mtc0_tlbw_hazard();
local_irq_restore(flags);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_write);
int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
struct kvm_vcpu *vcpu)
{
kvm_pfn_t pfn;
unsigned long flags, old_entryhi = 0, vaddr = 0;
unsigned long entrylo[2] = { 0, 0 };
unsigned int pair_idx;
pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage));
pair_idx = (badvaddr >> PAGE_SHIFT) & 1;
entrylo[pair_idx] = mips3_paddr_to_tlbpfn(pfn << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
ENTRYLO_D | ENTRYLO_V;
local_irq_save(flags);
old_entryhi = read_c0_entryhi();
vaddr = badvaddr & (PAGE_MASK << 1);
write_c0_entryhi(vaddr | kvm_mips_get_kernel_asid(vcpu));
write_c0_entrylo0(entrylo[0]);
write_c0_entrylo1(entrylo[1]);
write_c0_index(kvm_mips_get_commpage_asid(vcpu));
mtc0_tlbw_hazard();
tlb_write_indexed();
tlbw_use_hazard();
kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n",
vcpu->arch.pc, read_c0_index(), read_c0_entryhi(),
read_c0_entrylo0(), read_c0_entrylo1());
/* Restore old ASID */
write_c0_entryhi(old_entryhi);
mtc0_tlbw_hazard();
local_irq_restore(flags);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault);
int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
{
int i;
......@@ -228,51 +117,11 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
}
EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup);
int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
{
unsigned long old_entryhi, flags;
int idx;
local_irq_save(flags);
old_entryhi = read_c0_entryhi();
if (KVM_GUEST_KERNEL_MODE(vcpu))
write_c0_entryhi((vaddr & VPN2_MASK) |
kvm_mips_get_kernel_asid(vcpu));
else {
write_c0_entryhi((vaddr & VPN2_MASK) |
kvm_mips_get_user_asid(vcpu));
}
mtc0_tlbw_hazard();
tlb_probe();
tlb_probe_hazard();
idx = read_c0_index();
/* Restore old ASID */
write_c0_entryhi(old_entryhi);
mtc0_tlbw_hazard();
local_irq_restore(flags);
kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx);
return idx;
}
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_lookup);
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
{
int idx;
unsigned long flags, old_entryhi;
local_irq_save(flags);
old_entryhi = read_c0_entryhi();
write_c0_entryhi((va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu));
write_c0_entryhi(entryhi);
mtc0_tlbw_hazard();
tlb_probe();
......@@ -282,7 +131,7 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
if (idx >= current_cpu_data.tlbsize)
BUG();
if (idx > 0) {
if (idx >= 0) {
write_c0_entryhi(UNIQUE_ENTRYHI(idx));
write_c0_entrylo0(0);
write_c0_entrylo1(0);
......@@ -292,93 +141,75 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
tlbw_use_hazard();
}
write_c0_entryhi(old_entryhi);
mtc0_tlbw_hazard();
local_irq_restore(flags);
if (idx > 0)
kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__,
(va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu), idx);
return 0;
return idx;
}
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
void kvm_mips_flush_host_tlb(int skip_kseg0)
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
bool user, bool kernel)
{
unsigned long flags;
unsigned long old_entryhi, entryhi;
unsigned long old_pagemask;
int entry = 0;
int maxentry = current_cpu_data.tlbsize;
int idx_user, idx_kernel;
unsigned long flags, old_entryhi;
local_irq_save(flags);
old_entryhi = read_c0_entryhi();
old_pagemask = read_c0_pagemask();
/* Blast 'em all away. */
for (entry = 0; entry < maxentry; entry++) {
write_c0_index(entry);
if (skip_kseg0) {
mtc0_tlbr_hazard();
tlb_read();
tlb_read_hazard();
entryhi = read_c0_entryhi();
/* Don't blow away guest kernel entries */
if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0)
continue;
write_c0_pagemask(old_pagemask);
}
/* Make sure all entries differ. */
write_c0_entryhi(UNIQUE_ENTRYHI(entry));
write_c0_entrylo0(0);
write_c0_entrylo1(0);
mtc0_tlbw_hazard();
tlb_write_indexed();
tlbw_use_hazard();
}
if (user)
idx_user = _kvm_mips_host_tlb_inv((va & VPN2_MASK) |
kvm_mips_get_user_asid(vcpu));
if (kernel)
idx_kernel = _kvm_mips_host_tlb_inv((va & VPN2_MASK) |
kvm_mips_get_kernel_asid(vcpu));
write_c0_entryhi(old_entryhi);
write_c0_pagemask(old_pagemask);
mtc0_tlbw_hazard();
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb);
void kvm_local_flush_tlb_all(void)
{
unsigned long flags;
unsigned long old_ctx;
int entry = 0;
if (user && idx_user >= 0)
kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n",
__func__, (va & VPN2_MASK) |
kvm_mips_get_user_asid(vcpu), idx_user);
if (kernel && idx_kernel >= 0)
kvm_debug("%s: Invalidated guest kernel entryhi %#lx @ idx %d\n",
__func__, (va & VPN2_MASK) |
kvm_mips_get_kernel_asid(vcpu), idx_kernel);
local_irq_save(flags);
/* Save old context and create impossible VPN2 value */
old_ctx = read_c0_entryhi();
write_c0_entrylo0(0);
write_c0_entrylo1(0);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
/* Blast 'em all away. */
while (entry < current_cpu_data.tlbsize) {
/* Make sure all entries differ. */
write_c0_entryhi(UNIQUE_ENTRYHI(entry));
write_c0_index(entry);
mtc0_tlbw_hazard();
tlb_write_indexed();
tlbw_use_hazard();
entry++;
}
write_c0_entryhi(old_ctx);
mtc0_tlbw_hazard();
/**
* kvm_mips_suspend_mm() - Suspend the active mm.
* @cpu The CPU we're running on.
*
* Suspend the active_mm, ready for a switch to a KVM guest virtual address
* space. This is left active for the duration of guest context, including time
* with interrupts enabled, so we need to be careful not to confuse e.g. cache
* management IPIs.
*
* kvm_mips_resume_mm() should be called before context switching to a different
* process so we don't need to worry about reference counting.
*
* This needs to be in static kernel code to avoid exporting init_mm.
*/
void kvm_mips_suspend_mm(int cpu)
{
cpumask_clear_cpu(cpu, mm_cpumask(current->active_mm));
current->active_mm = &init_mm;
}
EXPORT_SYMBOL_GPL(kvm_mips_suspend_mm);
local_irq_restore(flags);
/**
* kvm_mips_resume_mm() - Resume the current process mm.
* @cpu The CPU we're running on.
*
* Resume the mm of the current process, after a switch back from a KVM guest
* virtual address space (see kvm_mips_suspend_mm()).
*/
void kvm_mips_resume_mm(int cpu)
{
cpumask_set_cpu(cpu, mm_cpumask(current->mm));
current->active_mm = current->mm;
}
EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all);
EXPORT_SYMBOL_GPL(kvm_mips_resume_mm);
......@@ -11,9 +11,11 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/vmalloc.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include "interrupt.h"
......@@ -21,9 +23,12 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
{
gpa_t gpa;
gva_t kseg = KSEGX(gva);
gva_t gkseg = KVM_GUEST_KSEGX(gva);
if ((kseg == CKSEG0) || (kseg == CKSEG1))
gpa = CPHYSADDR(gva);
else if (gkseg == KVM_GUEST_KSEG0)
gpa = KVM_GUEST_CPHYSADDR(gva);
else {
kvm_err("%s: cannot find GPA for GVA: %#lx\n", __func__, gva);
kvm_mips_dump_host_tlbs();
......@@ -83,48 +88,134 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
return ret;
}
static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
enum emulation_result er;
union mips_instruction inst;
int err;
/* A code fetch fault doesn't count as an MMIO */
if (kvm_is_ifetch_fault(&vcpu->arch)) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
/* Fetch the instruction. */
if (cause & CAUSEF_BD)
opc += 1;
err = kvm_get_badinstr(opc, vcpu, &inst.word);
if (err) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
/* Emulate the load */
er = kvm_mips_emulate_load(inst, cause, run, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Emulate load from MMIO space failed\n");
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
} else {
run->exit_reason = KVM_EXIT_MMIO;
}
return RESUME_HOST;
}
static int kvm_mips_bad_store(u32 cause, u32 *opc, struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
enum emulation_result er;
union mips_instruction inst;
int err;
/* Fetch the instruction. */
if (cause & CAUSEF_BD)
opc += 1;
err = kvm_get_badinstr(opc, vcpu, &inst.word);
if (err) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
/* Emulate the store */
er = kvm_mips_emulate_store(inst, cause, run, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Emulate store to MMIO space failed\n");
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
} else {
run->exit_reason = KVM_EXIT_MMIO;
}
return RESUME_HOST;
}
static int kvm_mips_bad_access(u32 cause, u32 *opc, struct kvm_run *run,
struct kvm_vcpu *vcpu, bool store)
{
if (store)
return kvm_mips_bad_store(cause, opc, run, vcpu);
else
return kvm_mips_bad_load(cause, opc, run, vcpu);
}
static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
struct kvm_run *run = vcpu->run;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
struct kvm_mips_tlb *tlb;
unsigned long entryhi;
int index;
if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0
|| KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) {
kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#x, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu);
/*
* First find the mapping in the guest TLB. If the failure to
* write was due to the guest TLB, it should be up to the guest
* to handle it.
*/
entryhi = (badvaddr & VPN2_MASK) |
(kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID);
index = kvm_mips_guest_tlb_lookup(vcpu, entryhi);
if (er == EMULATE_DONE)
ret = RESUME_GUEST;
else {
/*
* These should never happen.
* They would indicate stale host TLB entries.
*/
if (unlikely(index < 0)) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
return RESUME_HOST;
}
} else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) {
tlb = vcpu->arch.guest_tlb + index;
if (unlikely(!TLB_IS_VALID(*tlb, badvaddr))) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
/*
* XXXKYMA: The guest kernel does not expect to get this fault
* when we are not using HIGHMEM. Need to address this in a
* HIGHMEM kernel
* Guest entry not dirty? That would explain the TLB modified
* exception. Relay that on to the guest so it can handle it.
*/
kvm_err("TLB MOD fault not handled, cause %#x, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
kvm_mips_dump_host_tlbs();
kvm_arch_vcpu_dump_regs(vcpu);
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
if (!TLB_IS_DIRTY(*tlb, badvaddr)) {
kvm_mips_emulate_tlbmod(cause, opc, run, vcpu);
return RESUME_GUEST;
}
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, badvaddr,
true))
/* Not writable, needs handling as MMIO */
return kvm_mips_bad_store(cause, opc, run, vcpu);
return RESUME_GUEST;
} else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) {
if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, true) < 0)
/* Not writable, needs handling as MMIO */
return kvm_mips_bad_store(cause, opc, run, vcpu);
return RESUME_GUEST;
} else {
kvm_err("Illegal TLB Mod fault address , cause %#x, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
kvm_mips_dump_host_tlbs();
kvm_arch_vcpu_dump_regs(vcpu);
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
/* host kernel addresses are all handled as MMIO */
return kvm_mips_bad_store(cause, opc, run, vcpu);
}
return ret;
}
static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
......@@ -157,7 +248,7 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
* into the shadow host TLB
*/
er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu);
er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu, store);
if (er == EMULATE_DONE)
ret = RESUME_GUEST;
else {
......@@ -169,29 +260,15 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store)
* All KSEG0 faults are handled by KVM, as the guest kernel does
* not expect to ever get them
*/
if (kvm_mips_handle_kseg0_tlb_fault
(vcpu->arch.host_cp0_badvaddr, vcpu) < 0) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
}
if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, store) < 0)
ret = kvm_mips_bad_access(cause, opc, run, vcpu, store);
} else if (KVM_GUEST_KERNEL_MODE(vcpu)
&& (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
/*
* With EVA we may get a TLB exception instead of an address
* error when the guest performs MMIO to KSeg1 addresses.
*/
kvm_debug("Emulate %s MMIO space\n",
store ? "Store to" : "Load from");
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Emulate %s MMIO space failed\n",
store ? "Store to" : "Load from");
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
} else {
run->exit_reason = KVM_EXIT_MMIO;
ret = RESUME_HOST;
}
ret = kvm_mips_bad_access(cause, opc, run, vcpu, store);
} else {
kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n",
store ? "ST" : "LD", cause, opc, badvaddr);
......@@ -219,21 +296,11 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu)
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
if (KVM_GUEST_KERNEL_MODE(vcpu)
&& (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) {
kvm_debug("Emulate Store to MMIO space\n");
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Emulate Store to MMIO space failed\n");
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
} else {
run->exit_reason = KVM_EXIT_MMIO;
ret = RESUME_HOST;
}
ret = kvm_mips_bad_store(cause, opc, run, vcpu);
} else {
kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
......@@ -249,26 +316,15 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu)
u32 __user *opc = (u32 __user *) vcpu->arch.pc;
unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) {
kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr);
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Emulate Load from MMIO space failed\n");
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
} else {
run->exit_reason = KVM_EXIT_MMIO;
ret = RESUME_HOST;
}
ret = kvm_mips_bad_load(cause, opc, run, vcpu);
} else {
kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
er = EMULATE_FAIL;
}
return ret;
}
......@@ -428,16 +484,75 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
return ret;
}
static int kvm_trap_emul_vm_init(struct kvm *kvm)
static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
/*
* Allocate GVA -> HPA page tables.
* MIPS doesn't use the mm_struct pointer argument.
*/
kern_mm->pgd = pgd_alloc(kern_mm);
if (!kern_mm->pgd)
return -ENOMEM;
user_mm->pgd = pgd_alloc(user_mm);
if (!user_mm->pgd) {
pgd_free(kern_mm, kern_mm->pgd);
return -ENOMEM;
}
return 0;
}
static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu)
static void kvm_mips_emul_free_gva_pt(pgd_t *pgd)
{
vcpu->arch.kscratch_enabled = 0xfc;
/* Don't free host kernel page tables copied from init_mm.pgd */
const unsigned long end = 0x80000000;
unsigned long pgd_va, pud_va, pmd_va;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int i, j, k;
for (i = 0; i < USER_PTRS_PER_PGD; i++) {
if (pgd_none(pgd[i]))
continue;
pgd_va = (unsigned long)i << PGDIR_SHIFT;
if (pgd_va >= end)
break;
pud = pud_offset(pgd + i, 0);
for (j = 0; j < PTRS_PER_PUD; j++) {
if (pud_none(pud[j]))
continue;
return 0;
pud_va = pgd_va | ((unsigned long)j << PUD_SHIFT);
if (pud_va >= end)
break;
pmd = pmd_offset(pud + j, 0);
for (k = 0; k < PTRS_PER_PMD; k++) {
if (pmd_none(pmd[k]))
continue;
pmd_va = pud_va | (k << PMD_SHIFT);
if (pmd_va >= end)
break;
pte = pte_offset(pmd + k, 0);
pte_free_kernel(NULL, pte);
}
pmd_free(NULL, pmd);
}
pud_free(NULL, pud);
}
pgd_free(NULL, pgd);
}
static void kvm_trap_emul_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kvm_mips_emul_free_gva_pt(vcpu->arch.guest_kernel_mm.pgd);
kvm_mips_emul_free_gva_pt(vcpu->arch.guest_user_mm.pgd);
}
static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
......@@ -499,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
/* Set Wait IE/IXMT Ignore in Config7, IAR, AR */
kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10));
/* Status */
kvm_write_c0_guest_status(cop0, ST0_BEV | ST0_ERL);
/*
* Setup IntCtl defaults, compatibility mode for timer interrupts (HW5)
*/
......@@ -508,17 +626,76 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_write_c0_guest_ebase(cop0, KVM_GUEST_KSEG0 |
(vcpu_id & MIPS_EBASE_CPUNUM));
/* Put PC at guest reset vector */
vcpu->arch.pc = KVM_GUEST_CKSEG1ADDR(0x1fc00000);
return 0;
}
static void kvm_trap_emul_flush_shadow_all(struct kvm *kvm)
{
/* Flush GVA page tables and invalidate GVA ASIDs on all VCPUs */
kvm_flush_remote_tlbs(kvm);
}
static void kvm_trap_emul_flush_shadow_memslot(struct kvm *kvm,
const struct kvm_memory_slot *slot)
{
kvm_trap_emul_flush_shadow_all(kvm);
}
static u64 kvm_trap_emul_get_one_regs[] = {
KVM_REG_MIPS_CP0_INDEX,
KVM_REG_MIPS_CP0_ENTRYLO0,
KVM_REG_MIPS_CP0_ENTRYLO1,
KVM_REG_MIPS_CP0_CONTEXT,
KVM_REG_MIPS_CP0_USERLOCAL,
KVM_REG_MIPS_CP0_PAGEMASK,
KVM_REG_MIPS_CP0_WIRED,
KVM_REG_MIPS_CP0_HWRENA,
KVM_REG_MIPS_CP0_BADVADDR,
KVM_REG_MIPS_CP0_COUNT,
KVM_REG_MIPS_CP0_ENTRYHI,
KVM_REG_MIPS_CP0_COMPARE,
KVM_REG_MIPS_CP0_STATUS,
KVM_REG_MIPS_CP0_INTCTL,
KVM_REG_MIPS_CP0_CAUSE,
KVM_REG_MIPS_CP0_EPC,
KVM_REG_MIPS_CP0_PRID,
KVM_REG_MIPS_CP0_EBASE,
KVM_REG_MIPS_CP0_CONFIG,
KVM_REG_MIPS_CP0_CONFIG1,
KVM_REG_MIPS_CP0_CONFIG2,
KVM_REG_MIPS_CP0_CONFIG3,
KVM_REG_MIPS_CP0_CONFIG4,
KVM_REG_MIPS_CP0_CONFIG5,
KVM_REG_MIPS_CP0_CONFIG7,
KVM_REG_MIPS_CP0_ERROREPC,
KVM_REG_MIPS_CP0_KSCRATCH1,
KVM_REG_MIPS_CP0_KSCRATCH2,
KVM_REG_MIPS_CP0_KSCRATCH3,
KVM_REG_MIPS_CP0_KSCRATCH4,
KVM_REG_MIPS_CP0_KSCRATCH5,
KVM_REG_MIPS_CP0_KSCRATCH6,
KVM_REG_MIPS_COUNT_CTL,
KVM_REG_MIPS_COUNT_RESUME,
KVM_REG_MIPS_COUNT_HZ,
};
static unsigned long kvm_trap_emul_num_regs(struct kvm_vcpu *vcpu)
{
return 0;
return ARRAY_SIZE(kvm_trap_emul_get_one_regs);
}
static int kvm_trap_emul_copy_reg_indices(struct kvm_vcpu *vcpu,
u64 __user *indices)
{
if (copy_to_user(indices, kvm_trap_emul_get_one_regs,
sizeof(kvm_trap_emul_get_one_regs)))
return -EFAULT;
indices += ARRAY_SIZE(kvm_trap_emul_get_one_regs);
return 0;
}
......@@ -526,7 +703,81 @@ static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg,
s64 *v)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
switch (reg->id) {
case KVM_REG_MIPS_CP0_INDEX:
*v = (long)kvm_read_c0_guest_index(cop0);
break;
case KVM_REG_MIPS_CP0_ENTRYLO0:
*v = kvm_read_c0_guest_entrylo0(cop0);
break;
case KVM_REG_MIPS_CP0_ENTRYLO1:
*v = kvm_read_c0_guest_entrylo1(cop0);
break;
case KVM_REG_MIPS_CP0_CONTEXT:
*v = (long)kvm_read_c0_guest_context(cop0);
break;
case KVM_REG_MIPS_CP0_USERLOCAL:
*v = (long)kvm_read_c0_guest_userlocal(cop0);
break;
case KVM_REG_MIPS_CP0_PAGEMASK:
*v = (long)kvm_read_c0_guest_pagemask(cop0);
break;
case KVM_REG_MIPS_CP0_WIRED:
*v = (long)kvm_read_c0_guest_wired(cop0);
break;
case KVM_REG_MIPS_CP0_HWRENA:
*v = (long)kvm_read_c0_guest_hwrena(cop0);
break;
case KVM_REG_MIPS_CP0_BADVADDR:
*v = (long)kvm_read_c0_guest_badvaddr(cop0);
break;
case KVM_REG_MIPS_CP0_ENTRYHI:
*v = (long)kvm_read_c0_guest_entryhi(cop0);
break;
case KVM_REG_MIPS_CP0_COMPARE:
*v = (long)kvm_read_c0_guest_compare(cop0);
break;
case KVM_REG_MIPS_CP0_STATUS:
*v = (long)kvm_read_c0_guest_status(cop0);
break;
case KVM_REG_MIPS_CP0_INTCTL:
*v = (long)kvm_read_c0_guest_intctl(cop0);
break;
case KVM_REG_MIPS_CP0_CAUSE:
*v = (long)kvm_read_c0_guest_cause(cop0);
break;
case KVM_REG_MIPS_CP0_EPC:
*v = (long)kvm_read_c0_guest_epc(cop0);
break;
case KVM_REG_MIPS_CP0_PRID:
*v = (long)kvm_read_c0_guest_prid(cop0);
break;
case KVM_REG_MIPS_CP0_EBASE:
*v = (long)kvm_read_c0_guest_ebase(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG:
*v = (long)kvm_read_c0_guest_config(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG1:
*v = (long)kvm_read_c0_guest_config1(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG2:
*v = (long)kvm_read_c0_guest_config2(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG3:
*v = (long)kvm_read_c0_guest_config3(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG4:
*v = (long)kvm_read_c0_guest_config4(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG5:
*v = (long)kvm_read_c0_guest_config5(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG7:
*v = (long)kvm_read_c0_guest_config7(cop0);
break;
case KVM_REG_MIPS_CP0_COUNT:
*v = kvm_mips_read_count(vcpu);
break;
......@@ -539,6 +790,27 @@ static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_COUNT_HZ:
*v = vcpu->arch.count_hz;
break;
case KVM_REG_MIPS_CP0_ERROREPC:
*v = (long)kvm_read_c0_guest_errorepc(cop0);
break;
case KVM_REG_MIPS_CP0_KSCRATCH1:
*v = (long)kvm_read_c0_guest_kscratch1(cop0);
break;
case KVM_REG_MIPS_CP0_KSCRATCH2:
*v = (long)kvm_read_c0_guest_kscratch2(cop0);
break;
case KVM_REG_MIPS_CP0_KSCRATCH3:
*v = (long)kvm_read_c0_guest_kscratch3(cop0);
break;
case KVM_REG_MIPS_CP0_KSCRATCH4:
*v = (long)kvm_read_c0_guest_kscratch4(cop0);
break;
case KVM_REG_MIPS_CP0_KSCRATCH5:
*v = (long)kvm_read_c0_guest_kscratch5(cop0);
break;
case KVM_REG_MIPS_CP0_KSCRATCH6:
*v = (long)kvm_read_c0_guest_kscratch6(cop0);
break;
default:
return -EINVAL;
}
......@@ -554,6 +826,56 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
unsigned int cur, change;
switch (reg->id) {
case KVM_REG_MIPS_CP0_INDEX:
kvm_write_c0_guest_index(cop0, v);
break;
case KVM_REG_MIPS_CP0_ENTRYLO0:
kvm_write_c0_guest_entrylo0(cop0, v);
break;
case KVM_REG_MIPS_CP0_ENTRYLO1:
kvm_write_c0_guest_entrylo1(cop0, v);
break;
case KVM_REG_MIPS_CP0_CONTEXT:
kvm_write_c0_guest_context(cop0, v);
break;
case KVM_REG_MIPS_CP0_USERLOCAL:
kvm_write_c0_guest_userlocal(cop0, v);
break;
case KVM_REG_MIPS_CP0_PAGEMASK:
kvm_write_c0_guest_pagemask(cop0, v);
break;
case KVM_REG_MIPS_CP0_WIRED:
kvm_write_c0_guest_wired(cop0, v);
break;
case KVM_REG_MIPS_CP0_HWRENA:
kvm_write_c0_guest_hwrena(cop0, v);
break;
case KVM_REG_MIPS_CP0_BADVADDR:
kvm_write_c0_guest_badvaddr(cop0, v);
break;
case KVM_REG_MIPS_CP0_ENTRYHI:
kvm_write_c0_guest_entryhi(cop0, v);
break;
case KVM_REG_MIPS_CP0_STATUS:
kvm_write_c0_guest_status(cop0, v);
break;
case KVM_REG_MIPS_CP0_INTCTL:
/* No VInt, so no VS, read-only for now */
break;
case KVM_REG_MIPS_CP0_EPC:
kvm_write_c0_guest_epc(cop0, v);
break;
case KVM_REG_MIPS_CP0_PRID:
kvm_write_c0_guest_prid(cop0, v);
break;
case KVM_REG_MIPS_CP0_EBASE:
/*
* Allow core number to be written, but the exception base must
* remain in guest KSeg0.
*/
kvm_change_c0_guest_ebase(cop0, 0x1ffff000 | MIPS_EBASE_CPUNUM,
v);
break;
case KVM_REG_MIPS_CP0_COUNT:
kvm_mips_write_count(vcpu, v);
break;
......@@ -618,6 +940,9 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
kvm_write_c0_guest_config5(cop0, v);
}
break;
case KVM_REG_MIPS_CP0_CONFIG7:
/* writes ignored */
break;
case KVM_REG_MIPS_COUNT_CTL:
ret = kvm_mips_set_count_ctl(vcpu, v);
break;
......@@ -627,24 +952,269 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_COUNT_HZ:
ret = kvm_mips_set_count_hz(vcpu, v);
break;
case KVM_REG_MIPS_CP0_ERROREPC:
kvm_write_c0_guest_errorepc(cop0, v);
break;
case KVM_REG_MIPS_CP0_KSCRATCH1:
kvm_write_c0_guest_kscratch1(cop0, v);
break;
case KVM_REG_MIPS_CP0_KSCRATCH2:
kvm_write_c0_guest_kscratch2(cop0, v);
break;
case KVM_REG_MIPS_CP0_KSCRATCH3:
kvm_write_c0_guest_kscratch3(cop0, v);
break;
case KVM_REG_MIPS_CP0_KSCRATCH4:
kvm_write_c0_guest_kscratch4(cop0, v);
break;
case KVM_REG_MIPS_CP0_KSCRATCH5:
kvm_write_c0_guest_kscratch5(cop0, v);
break;
case KVM_REG_MIPS_CP0_KSCRATCH6:
kvm_write_c0_guest_kscratch6(cop0, v);
break;
default:
return -EINVAL;
}
return ret;
}
static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_lose_fpu(vcpu);
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
struct mm_struct *mm;
/*
* Were we in guest context? If so, restore the appropriate ASID based
* on the mode of the Guest (Kernel/User).
*/
if (current->flags & PF_VCPU) {
mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm;
if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) &
asid_version_mask(cpu))
get_new_mmu_context(mm, cpu);
write_c0_entryhi(cpu_asid(cpu, mm));
TLBMISS_HANDLER_SETUP_PGD(mm->pgd);
kvm_mips_suspend_mm(cpu);
ehb();
}
return 0;
}
static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu)
static int kvm_trap_emul_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
{
kvm_lose_fpu(vcpu);
if (current->flags & PF_VCPU) {
/* Restore normal Linux process memory map */
if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
asid_version_mask(cpu)))
get_new_mmu_context(current->mm, cpu);
write_c0_entryhi(cpu_asid(cpu, current->mm));
TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd);
kvm_mips_resume_mm(cpu);
ehb();
}
return 0;
}
static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu,
bool reload_asid)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
struct mm_struct *mm;
int i;
if (likely(!vcpu->requests))
return;
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
/*
* Both kernel & user GVA mappings must be invalidated. The
* caller is just about to check whether the ASID is stale
* anyway so no need to reload it here.
*/
kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_GPA | KMF_KERN);
kvm_mips_flush_gva_pt(user_mm->pgd, KMF_GPA | KMF_USER);
for_each_possible_cpu(i) {
cpu_context(i, kern_mm) = 0;
cpu_context(i, user_mm) = 0;
}
/* Generate new ASID for current mode */
if (reload_asid) {
mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm;
get_new_mmu_context(mm, cpu);
htw_stop();
write_c0_entryhi(cpu_asid(cpu, mm));
TLBMISS_HANDLER_SETUP_PGD(mm->pgd);
htw_start();
}
}
}
/**
* kvm_trap_emul_gva_lockless_begin() - Begin lockless access to GVA space.
* @vcpu: VCPU pointer.
*
* Call before a GVA space access outside of guest mode, to ensure that
* asynchronous TLB flush requests are handled or delayed until completion of
* the GVA access (as indicated by a matching kvm_trap_emul_gva_lockless_end()).
*
* Should be called with IRQs already enabled.
*/
void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu)
{
/* We re-enable IRQs in kvm_trap_emul_gva_lockless_end() */
WARN_ON_ONCE(irqs_disabled());
/*
* The caller is about to access the GVA space, so we set the mode to
* force TLB flush requests to send an IPI, and also disable IRQs to
* delay IPI handling until kvm_trap_emul_gva_lockless_end().
*/
local_irq_disable();
/*
* Make sure the read of VCPU requests is not reordered ahead of the
* write to vcpu->mode, or we could miss a TLB flush request while
* the requester sees the VCPU as outside of guest mode and not needing
* an IPI.
*/
smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
/*
* If a TLB flush has been requested (potentially while
* OUTSIDE_GUEST_MODE and assumed immediately effective), perform it
* before accessing the GVA space, and be sure to reload the ASID if
* necessary as it'll be immediately used.
*
* TLB flush requests after this check will trigger an IPI due to the
* mode change above, which will be delayed due to IRQs disabled.
*/
kvm_trap_emul_check_requests(vcpu, smp_processor_id(), true);
}
/**
* kvm_trap_emul_gva_lockless_end() - End lockless access to GVA space.
* @vcpu: VCPU pointer.
*
* Called after a GVA space access outside of guest mode. Should have a matching
* call to kvm_trap_emul_gva_lockless_begin().
*/
void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu)
{
/*
* Make sure the write to vcpu->mode is not reordered in front of GVA
* accesses, or a TLB flush requester may not think it necessary to send
* an IPI.
*/
smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
/*
* Now that the access to GVA space is complete, its safe for pending
* TLB flush request IPIs to be handled (which indicates completion).
*/
local_irq_enable();
}
static void kvm_trap_emul_vcpu_reenter(struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
struct mm_struct *user_mm = &vcpu->arch.guest_user_mm;
struct mm_struct *mm;
struct mips_coproc *cop0 = vcpu->arch.cop0;
int i, cpu = smp_processor_id();
unsigned int gasid;
/*
* No need to reload ASID, IRQs are disabled already so there's no rush,
* and we'll check if we need to regenerate below anyway before
* re-entering the guest.
*/
kvm_trap_emul_check_requests(vcpu, cpu, false);
if (KVM_GUEST_KERNEL_MODE(vcpu)) {
mm = kern_mm;
} else {
mm = user_mm;
/*
* Lazy host ASID regeneration / PT flush for guest user mode.
* If the guest ASID has changed since the last guest usermode
* execution, invalidate the stale TLB entries and flush GVA PT
* entries too.
*/
gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID;
if (gasid != vcpu->arch.last_user_gasid) {
kvm_mips_flush_gva_pt(user_mm->pgd, KMF_USER);
for_each_possible_cpu(i)
cpu_context(i, user_mm) = 0;
vcpu->arch.last_user_gasid = gasid;
}
}
/*
* Check if ASID is stale. This may happen due to a TLB flush request or
* a lazy user MM invalidation.
*/
if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) &
asid_version_mask(cpu))
get_new_mmu_context(mm, cpu);
}
static int kvm_trap_emul_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int cpu = smp_processor_id();
int r;
/* Check if we have any exceptions/interrupts pending */
kvm_mips_deliver_interrupts(vcpu,
kvm_read_c0_guest_cause(vcpu->arch.cop0));
kvm_trap_emul_vcpu_reenter(run, vcpu);
/*
* We use user accessors to access guest memory, but we don't want to
* invoke Linux page faulting.
*/
pagefault_disable();
/* Disable hardware page table walking while in guest */
htw_stop();
/*
* While in guest context we're in the guest's address space, not the
* host process address space, so we need to be careful not to confuse
* e.g. cache management IPIs.
*/
kvm_mips_suspend_mm(cpu);
r = vcpu->arch.vcpu_run(run, vcpu);
/* We may have migrated while handling guest exits */
cpu = smp_processor_id();
/* Restore normal Linux process memory map */
if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
asid_version_mask(cpu)))
get_new_mmu_context(current->mm, cpu);
write_c0_entryhi(cpu_asid(cpu, current->mm));
TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd);
kvm_mips_resume_mm(cpu);
htw_start();
pagefault_enable();
return r;
}
static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
/* exit handlers */
.handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
......@@ -661,9 +1231,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
.handle_fpe = kvm_trap_emul_handle_fpe,
.handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
.vm_init = kvm_trap_emul_vm_init,
.vcpu_init = kvm_trap_emul_vcpu_init,
.vcpu_uninit = kvm_trap_emul_vcpu_uninit,
.vcpu_setup = kvm_trap_emul_vcpu_setup,
.flush_shadow_all = kvm_trap_emul_flush_shadow_all,
.flush_shadow_memslot = kvm_trap_emul_flush_shadow_memslot,
.gva_to_gpa = kvm_trap_emul_gva_to_gpa_cb,
.queue_timer_int = kvm_mips_queue_timer_int_cb,
.dequeue_timer_int = kvm_mips_dequeue_timer_int_cb,
......@@ -675,8 +1247,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
.copy_reg_indices = kvm_trap_emul_copy_reg_indices,
.get_one_reg = kvm_trap_emul_get_one_reg,
.set_one_reg = kvm_trap_emul_set_one_reg,
.vcpu_get_regs = kvm_trap_emul_vcpu_get_regs,
.vcpu_set_regs = kvm_trap_emul_vcpu_set_regs,
.vcpu_load = kvm_trap_emul_vcpu_load,
.vcpu_put = kvm_trap_emul_vcpu_put,
.vcpu_run = kvm_trap_emul_vcpu_run,
.vcpu_reenter = kvm_trap_emul_vcpu_reenter,
};
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
......
......@@ -4,7 +4,7 @@
obj-y += cache.o dma-default.o extable.o fault.o \
gup.o init.o mmap.o page.o page-funcs.o \
tlbex.o tlbex-fault.o tlb-funcs.o
pgtable.o tlbex.o tlbex-fault.o tlb-funcs.o
ifdef CONFIG_CPU_MICROMIPS
obj-y += uasm-micromips.o
......
......@@ -538,5 +538,6 @@ unsigned long pgd_current[NR_CPUS];
pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
EXPORT_SYMBOL_GPL(invalid_pmd_table);
#endif
pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
......@@ -6,6 +6,7 @@
* Copyright (C) 1999, 2000 by Silicon Graphics
* Copyright (C) 2003 by Ralf Baechle
*/
#include <linux/export.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <asm/fixmap.h>
......@@ -60,6 +61,7 @@ void pmd_init(unsigned long addr, unsigned long pagetable)
p[-1] = pagetable;
} while (p != end);
}
EXPORT_SYMBOL_GPL(pmd_init);
#endif
pmd_t mk_pmd(struct page *page, pgprot_t prot)
......
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <asm/pgalloc.h>
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *ret, *init;
ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
if (ret) {
init = pgd_offset(&init_mm, 0UL);
pgd_init((unsigned long)ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
return ret;
}
EXPORT_SYMBOL_GPL(pgd_alloc);
......@@ -22,6 +22,7 @@
*/
#include <linux/bug.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/smp.h>
......@@ -34,6 +35,7 @@
#include <asm/war.h>
#include <asm/uasm.h>
#include <asm/setup.h>
#include <asm/tlbex.h>
static int mips_xpa_disabled;
......@@ -344,7 +346,8 @@ static int allocate_kscratch(void)
}
static int scratch_reg;
static int pgd_reg;
int pgd_reg;
EXPORT_SYMBOL_GPL(pgd_reg);
enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch};
static struct work_registers build_get_work_registers(u32 **p)
......@@ -496,13 +499,7 @@ static void __maybe_unused build_tlb_probe_entry(u32 **p)
}
}
/*
* Write random or indexed TLB entry, and care about the hazards from
* the preceding mtc0 and for the following eret.
*/
enum tlb_write_entry { tlb_random, tlb_indexed };
static void build_tlb_write_entry(u32 **p, struct uasm_label **l,
void build_tlb_write_entry(u32 **p, struct uasm_label **l,
struct uasm_reloc **r,
enum tlb_write_entry wmode)
{
......@@ -627,6 +624,7 @@ static void build_tlb_write_entry(u32 **p, struct uasm_label **l,
break;
}
}
EXPORT_SYMBOL_GPL(build_tlb_write_entry);
static __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
unsigned int reg)
......@@ -781,8 +779,7 @@ static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
* TMP and PTR are scratch.
* TMP will be clobbered, PTR will hold the pmd entry.
*/
static void
build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
unsigned int tmp, unsigned int ptr)
{
#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
......@@ -859,6 +856,7 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */
#endif
}
EXPORT_SYMBOL_GPL(build_get_pmde64);
/*
* BVADDR is the faulting address, PTR is scratch.
......@@ -934,8 +932,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
* TMP and PTR are scratch.
* TMP will be clobbered, PTR will hold the pgd entry.
*/
static void __maybe_unused
build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
void build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
{
if (pgd_reg != -1) {
/* pgd is in pgd_reg */
......@@ -960,6 +957,7 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
}
EXPORT_SYMBOL_GPL(build_get_pgde32);
#endif /* !CONFIG_64BIT */
......@@ -989,7 +987,7 @@ static void build_adjust_context(u32 **p, unsigned int ctx)
uasm_i_andi(p, ctx, ctx, mask);
}
static void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
{
/*
* Bug workaround for the Nevada. It seems as if under certain
......@@ -1013,8 +1011,9 @@ static void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
build_adjust_context(p, tmp);
UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */
}
EXPORT_SYMBOL_GPL(build_get_ptep);
static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
{
int pte_off_even = 0;
int pte_off_odd = sizeof(pte_t);
......@@ -1063,6 +1062,7 @@ static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
UASM_i_MTC0(p, 0, C0_ENTRYLO1);
UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */
}
EXPORT_SYMBOL_GPL(build_update_entries);
struct mips_huge_tlb_info {
int huge_pte;
......@@ -1536,7 +1536,9 @@ static void build_loongson3_tlb_refill_handler(void)
extern u32 handle_tlbl[], handle_tlbl_end[];
extern u32 handle_tlbs[], handle_tlbs_end[];
extern u32 handle_tlbm[], handle_tlbm_end[];
extern u32 tlbmiss_handler_setup_pgd_start[], tlbmiss_handler_setup_pgd[];
extern u32 tlbmiss_handler_setup_pgd_start[];
extern u32 tlbmiss_handler_setup_pgd[];
EXPORT_SYMBOL_GPL(tlbmiss_handler_setup_pgd);
extern u32 tlbmiss_handler_setup_pgd_end[];
static void build_setup_pgd(void)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册