提交 a9f6cf96 编写于 作者: G Gleb Natapov

Merge branch 'kvm-ppc-next' of git://github.com/agraf/linux-2.6 into queue

* 'kvm-ppc-next' of git://github.com/agraf/linux-2.6:
  KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate()
  KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls
  KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX
  KVM: PPC: Book3S: Fix compile error in XICS emulation
  KVM: PPC: Book3S PR: return appropriate error when allocation fails
  arch: powerpc: kvm: add signed type cast for comparation
  powerpc/kvm: Copy the pvr value after memset
  KVM: PPC: Book3S PR: Load up SPRG3 register with guest value on guest entry
  kvm/ppc/booke: Don't call kvm_guest_enter twice
  kvm/ppc: Call trace_hardirqs_on before entry
  KVM: PPC: Book3S HV: Allow negative offsets to real-mode hcall handlers
  KVM: PPC: Book3S HV: Correct tlbie usage
  powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
  powerpc/kvm: Contiguous memory allocator based RMA allocation
  powerpc/kvm: Contiguous memory allocator based hash page table allocation
  KVM: PPC: Book3S: Ignore DABR register
  mm/cma: Move dma contiguous changes into a seperate config
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#define ASMARM_DMA_CONTIGUOUS_H #define ASMARM_DMA_CONTIGUOUS_H
#ifdef __KERNEL__ #ifdef __KERNEL__
#ifdef CONFIG_CMA #ifdef CONFIG_DMA_CMA
#include <linux/types.h> #include <linux/types.h>
#include <asm-generic/dma-contiguous.h> #include <asm-generic/dma-contiguous.h>
......
...@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void) ...@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
if (!pages) if (!pages)
goto no_pages; goto no_pages;
if (IS_ENABLED(CONFIG_CMA)) if (IS_ENABLED(CONFIG_DMA_CMA))
ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
atomic_pool_init); atomic_pool_init);
else else
...@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, ...@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
addr = __alloc_simple_buffer(dev, size, gfp, &page); addr = __alloc_simple_buffer(dev, size, gfp, &page);
else if (!(gfp & __GFP_WAIT)) else if (!(gfp & __GFP_WAIT))
addr = __alloc_from_pool(size, &page); addr = __alloc_from_pool(size, &page);
else if (!IS_ENABLED(CONFIG_CMA)) else if (!IS_ENABLED(CONFIG_DMA_CMA))
addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
else else
addr = __alloc_from_contiguous(dev, size, prot, &page, caller); addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
...@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, ...@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
__dma_free_buffer(page, size); __dma_free_buffer(page, size);
} else if (__free_from_pool(cpu_addr, size)) { } else if (__free_from_pool(cpu_addr, size)) {
return; return;
} else if (!IS_ENABLED(CONFIG_CMA)) { } else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
__dma_free_remap(cpu_addr, size); __dma_free_remap(cpu_addr, size);
__dma_free_buffer(page, size); __dma_free_buffer(page, size);
} else { } else {
......
...@@ -334,6 +334,27 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) ...@@ -334,6 +334,27 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
return r; return r;
} }
/*
* Like kvmppc_get_last_inst(), but for fetching a sc instruction.
* Because the sc instruction sets SRR0 to point to the following
* instruction, we have to fetch from pc - 4.
*/
static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
{
ulong pc = kvmppc_get_pc(vcpu) - 4;
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
u32 r;
/* Load the instruction manually if it failed to do so in the
* exit path */
if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
r = svcpu->last_inst;
svcpu_put(svcpu);
return r;
}
static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
{ {
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
...@@ -446,6 +467,23 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) ...@@ -446,6 +467,23 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
return vcpu->arch.last_inst; return vcpu->arch.last_inst;
} }
/*
* Like kvmppc_get_last_inst(), but for fetching a sc instruction.
* Because the sc instruction sets SRR0 to point to the following
* instruction, we have to fetch from pc - 4.
*/
static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
{
ulong pc = kvmppc_get_pc(vcpu) - 4;
/* Load the instruction manually if it failed to do so in the
* exit path */
if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
return vcpu->arch.last_inst;
}
static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.fault_dar; return vcpu->arch.fault_dar;
......
...@@ -37,7 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) ...@@ -37,7 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
#ifdef CONFIG_KVM_BOOK3S_64_HV #ifdef CONFIG_KVM_BOOK3S_64_HV
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
extern int kvm_hpt_order; /* order of preallocated HPTs */ extern unsigned long kvm_rma_pages;
#endif #endif
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
...@@ -100,7 +100,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, ...@@ -100,7 +100,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
/* (masks depend on page size) */ /* (masks depend on page size) */
rb |= 0x1000; /* page encoding in LP field */ rb |= 0x1000; /* page encoding in LP field */
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ rb |= ((va_low << 4) & 0xf0); /* AVAL field (P7 doesn't seem to care) */
} }
} else { } else {
/* 4kB page */ /* 4kB page */
......
...@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table { ...@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
struct page *pages[0]; struct page *pages[0];
}; };
struct kvmppc_linear_info { struct kvm_rma_info {
void *base_virt; atomic_t use_count;
unsigned long base_pfn; unsigned long base_pfn;
unsigned long npages;
struct list_head list;
atomic_t use_count;
int type;
}; };
/* XICS components, defined in book3s_xics.c */ /* XICS components, defined in book3s_xics.c */
...@@ -246,7 +242,7 @@ struct kvm_arch { ...@@ -246,7 +242,7 @@ struct kvm_arch {
int tlbie_lock; int tlbie_lock;
unsigned long lpcr; unsigned long lpcr;
unsigned long rmor; unsigned long rmor;
struct kvmppc_linear_info *rma; struct kvm_rma_info *rma;
unsigned long vrma_slb_v; unsigned long vrma_slb_v;
int rma_setup_done; int rma_setup_done;
int using_mmu_notifiers; int using_mmu_notifiers;
...@@ -259,7 +255,7 @@ struct kvm_arch { ...@@ -259,7 +255,7 @@ struct kvm_arch {
spinlock_t slot_phys_lock; spinlock_t slot_phys_lock;
cpumask_t need_tlb_flush; cpumask_t need_tlb_flush;
struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li; int hpt_cma_alloc;
#endif /* CONFIG_KVM_BOOK3S_64_HV */ #endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables; struct list_head spapr_tce_tables;
......
...@@ -137,10 +137,10 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -137,10 +137,10 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce); unsigned long ioba, unsigned long tce);
extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
struct kvm_allocate_rma *rma); struct kvm_allocate_rma *rma);
extern struct kvmppc_linear_info *kvm_alloc_rma(void); extern struct kvm_rma_info *kvm_alloc_rma(void);
extern void kvm_release_rma(struct kvmppc_linear_info *ri); extern void kvm_release_rma(struct kvm_rma_info *ri);
extern struct kvmppc_linear_info *kvm_alloc_hpt(void); extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
extern void kvm_release_hpt(struct kvmppc_linear_info *li); extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
extern int kvmppc_core_init_vm(struct kvm *kvm); extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm);
extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
...@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); ...@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
struct openpic; struct openpic;
#ifdef CONFIG_KVM_BOOK3S_64_HV #ifdef CONFIG_KVM_BOOK3S_64_HV
extern void kvm_cma_reserve(void) __init;
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{ {
paca[cpu].kvm_hstate.xics_phys = addr; paca[cpu].kvm_hstate.xics_phys = addr;
...@@ -281,13 +282,12 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) ...@@ -281,13 +282,12 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
} }
extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
extern void kvm_linear_init(void);
#else #else
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void __init kvm_cma_reserve(void)
{} {}
static inline void kvm_linear_init(void) static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{} {}
static inline u32 kvmppc_get_xics_latch(void) static inline u32 kvmppc_get_xics_latch(void)
...@@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn) ...@@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
} }
} }
/* Please call after prepare_to_enter. This function puts the lazy ee state /*
back to normal mode, without actually enabling interrupts. */ * Please call after prepare_to_enter. This function puts the lazy ee and irq
static inline void kvmppc_lazy_ee_enable(void) * disabled tracking state back to normal mode, without actually enabling
* interrupts.
*/
static inline void kvmppc_fix_ee_before_entry(void)
{ {
trace_hardirqs_on();
#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC64
/* Only need to enable IRQs by hard enabling them after this */ /* Only need to enable IRQs by hard enabling them after this */
local_paca->irq_happened = 0; local_paca->irq_happened = 0;
......
...@@ -451,6 +451,7 @@ int main(void) ...@@ -451,6 +451,7 @@ int main(void)
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
#endif #endif
DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
......
...@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr) ...@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr)
/* Initialize the hash table or TLB handling */ /* Initialize the hash table or TLB handling */
early_init_mmu(); early_init_mmu();
kvm_cma_reserve();
/* /*
* Reserve any gigantic pages requested on the command line. * Reserve any gigantic pages requested on the command line.
* memblock needs to have been initialized by the time this is * memblock needs to have been initialized by the time this is
...@@ -609,8 +611,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -609,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
/* Initialize the MMU context management stuff */ /* Initialize the MMU context management stuff */
mmu_context_init(); mmu_context_init();
kvm_linear_init();
/* Interrupt code needs to be 64K-aligned */ /* Interrupt code needs to be 64K-aligned */
if ((unsigned long)_stext & 0xffff) if ((unsigned long)_stext & 0xffff)
panic("Kernelbase not 64K-aligned (0x%lx)!\n", panic("Kernelbase not 64K-aligned (0x%lx)!\n",
......
...@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV ...@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
depends on KVM_BOOK3S_64 depends on KVM_BOOK3S_64
select MMU_NOTIFIER select MMU_NOTIFIER
select CMA
---help--- ---help---
Support running unmodified book3s_64 guest kernels in Support running unmodified book3s_64 guest kernels in
virtual machines on POWER7 and PPC970 processors that have virtual machines on POWER7 and PPC970 processors that have
......
...@@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ ...@@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_64_vio_hv.o \ book3s_64_vio_hv.o \
book3s_hv_ras.o \ book3s_hv_ras.o \
book3s_hv_builtin.o \ book3s_hv_builtin.o \
book3s_hv_cma.o \
$(kvm-book3s_64-builtin-xics-objs-y) $(kvm-book3s_64-builtin-xics-objs-y)
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
......
...@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, ...@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
hva_t ptegp; hva_t ptegp;
u64 pteg[16]; u64 pteg[16];
u64 avpn = 0; u64 avpn = 0;
u64 v, r;
u64 v_val, v_mask;
u64 eaddr_mask;
int i; int i;
u8 key = 0; u8 pp, key = 0;
bool found = false; bool found = false;
int second = 0; bool second = false;
ulong mp_ea = vcpu->arch.magic_page_ea; ulong mp_ea = vcpu->arch.magic_page_ea;
/* Magic page override */ /* Magic page override */
...@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, ...@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
goto no_seg_found; goto no_seg_found;
avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
v_val = avpn & HPTE_V_AVPN;
if (slbe->tb) if (slbe->tb)
avpn |= SLB_VSID_B_1T; v_val |= SLB_VSID_B_1T;
if (slbe->large)
v_val |= HPTE_V_LARGE;
v_val |= HPTE_V_VALID;
v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
HPTE_V_SECONDARY;
do_second: do_second:
ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
...@@ -227,91 +238,74 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, ...@@ -227,91 +238,74 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
key = 4; key = 4;
for (i=0; i<16; i+=2) { for (i=0; i<16; i+=2) {
u64 v = pteg[i]; /* Check all relevant fields of 1st dword */
u64 r = pteg[i+1]; if ((pteg[i] & v_mask) == v_val) {
/* Valid check */
if (!(v & HPTE_V_VALID))
continue;
/* Hash check */
if ((v & HPTE_V_SECONDARY) != second)
continue;
/* AVPN compare */
if (HPTE_V_COMPARE(avpn, v)) {
u8 pp = (r & HPTE_R_PP) | key;
int eaddr_mask = 0xFFF;
gpte->eaddr = eaddr;
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
eaddr,
data);
if (slbe->large)
eaddr_mask = 0xFFFFFF;
gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
gpte->may_execute = ((r & HPTE_R_N) ? false : true);
gpte->may_read = false;
gpte->may_write = false;
switch (pp) {
case 0:
case 1:
case 2:
case 6:
gpte->may_write = true;
/* fall through */
case 3:
case 5:
case 7:
gpte->may_read = true;
break;
}
dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
"-> 0x%lx\n",
eaddr, avpn, gpte->vpage, gpte->raddr);
found = true; found = true;
break; break;
} }
} }
/* Update PTE R and C bits, so the guest's swapper knows we used the if (!found) {
* page */ if (second)
if (found) { goto no_page_found;
u32 oldr = pteg[i+1]; v_val |= HPTE_V_SECONDARY;
second = true;
goto do_second;
}
if (gpte->may_read) { v = pteg[i];
/* Set the accessed flag */ r = pteg[i+1];
pteg[i+1] |= HPTE_R_R; pp = (r & HPTE_R_PP) | key;
} eaddr_mask = 0xFFF;
if (gpte->may_write) {
/* Set the dirty flag */ gpte->eaddr = eaddr;
pteg[i+1] |= HPTE_R_C; gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
} else { if (slbe->large)
dprintk("KVM: Mapping read-only page!\n"); eaddr_mask = 0xFFFFFF;
} gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
gpte->may_execute = ((r & HPTE_R_N) ? false : true);
gpte->may_read = false;
gpte->may_write = false;
switch (pp) {
case 0:
case 1:
case 2:
case 6:
gpte->may_write = true;
/* fall through */
case 3:
case 5:
case 7:
gpte->may_read = true;
break;
}
/* Write back into the PTEG */ dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
if (pteg[i+1] != oldr) "-> 0x%lx\n",
copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); eaddr, avpn, gpte->vpage, gpte->raddr);
if (!gpte->may_read) /* Update PTE R and C bits, so the guest's swapper knows we used the
return -EPERM; * page */
return 0; if (gpte->may_read) {
} else { /* Set the accessed flag */
dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " r |= HPTE_R_R;
"ptegp=0x%lx)\n", }
eaddr, to_book3s(vcpu)->sdr1, ptegp); if (data && gpte->may_write) {
for (i = 0; i < 16; i += 2) /* Set the dirty flag -- XXX even if not writing */
dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n", r |= HPTE_R_C;
i, pteg[i], pteg[i+1], avpn); }
if (!second) { /* Write back into the PTEG */
second = HPTE_V_SECONDARY; if (pteg[i+1] != r) {
goto do_second; pteg[i+1] = r;
} copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
} }
if (!gpte->may_read)
return -EPERM;
return 0;
no_page_found: no_page_found:
return -ENOENT; return -ENOENT;
......
...@@ -37,6 +37,8 @@ ...@@ -37,6 +37,8 @@
#include <asm/ppc-opcode.h> #include <asm/ppc-opcode.h>
#include <asm/cputable.h> #include <asm/cputable.h>
#include "book3s_hv_cma.h"
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63 #define MAX_LPID_970 63
...@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) ...@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{ {
unsigned long hpt; unsigned long hpt;
struct revmap_entry *rev; struct revmap_entry *rev;
struct kvmppc_linear_info *li; struct page *page = NULL;
long order = kvm_hpt_order; long order = KVM_DEFAULT_HPT_ORDER;
if (htab_orderp) { if (htab_orderp) {
order = *htab_orderp; order = *htab_orderp;
...@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) ...@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
order = PPC_MIN_HPT_ORDER; order = PPC_MIN_HPT_ORDER;
} }
kvm->arch.hpt_cma_alloc = 0;
/* /*
* If the user wants a different size from default,
* try first to allocate it from the kernel page allocator. * try first to allocate it from the kernel page allocator.
* We keep the CMA reserved for failed allocation.
*/ */
hpt = 0; hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
if (order != kvm_hpt_order) { __GFP_NOWARN, order - PAGE_SHIFT);
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
__GFP_NOWARN, order - PAGE_SHIFT);
if (!hpt)
--order;
}
/* Next try to allocate from the preallocated pool */ /* Next try to allocate from the preallocated pool */
if (!hpt) { if (!hpt) {
li = kvm_alloc_hpt(); VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
if (li) { page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
hpt = (ulong)li->base_virt; if (page) {
kvm->arch.hpt_li = li; hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
order = kvm_hpt_order; kvm->arch.hpt_cma_alloc = 1;
} } else
--order;
} }
/* Lastly try successively smaller sizes from the page allocator */ /* Lastly try successively smaller sizes from the page allocator */
...@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) ...@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
return 0; return 0;
out_freehpt: out_freehpt:
if (kvm->arch.hpt_li) if (kvm->arch.hpt_cma_alloc)
kvm_release_hpt(kvm->arch.hpt_li); kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
else else
free_pages(hpt, order - PAGE_SHIFT); free_pages(hpt, order - PAGE_SHIFT);
return -ENOMEM; return -ENOMEM;
...@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm) ...@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
{ {
kvmppc_free_lpid(kvm->arch.lpid); kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap); vfree(kvm->arch.revmap);
if (kvm->arch.hpt_li) if (kvm->arch.hpt_cma_alloc)
kvm_release_hpt(kvm->arch.hpt_li); kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
1 << (kvm->arch.hpt_order - PAGE_SHIFT));
else else
free_pages(kvm->arch.hpt_virt, free_pages(kvm->arch.hpt_virt,
kvm->arch.hpt_order - PAGE_SHIFT); kvm->arch.hpt_order - PAGE_SHIFT);
......
...@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) ...@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_PMC4_GEKKO: case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO: case SPRN_WPAR_GEKKO:
case SPRN_MSSSR0: case SPRN_MSSSR0:
case SPRN_DABR:
break; break;
unprivileged: unprivileged:
default: default:
...@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) ...@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_PMC4_GEKKO: case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO: case SPRN_WPAR_GEKKO:
case SPRN_MSSSR0: case SPRN_MSSSR0:
case SPRN_DABR:
*spr_val = 0; *spr_val = 0;
break; break;
default: default:
......
...@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
} }
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs) struct kvm_sregs *sregs)
{ {
int i; int i;
sregs->pvr = vcpu->arch.pvr;
memset(sregs, 0, sizeof(struct kvm_sregs)); memset(sregs, 0, sizeof(struct kvm_sregs));
sregs->pvr = vcpu->arch.pvr;
for (i = 0; i < vcpu->arch.slb_max; i++) { for (i = 0; i < vcpu->arch.slb_max; i++) {
sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
...@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, ...@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
} }
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs) struct kvm_sregs *sregs)
{ {
int i, j; int i, j;
...@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size) ...@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
struct kvmppc_linear_info *ri = vma->vm_file->private_data;
struct page *page; struct page *page;
struct kvm_rma_info *ri = vma->vm_file->private_data;
if (vmf->pgoff >= ri->npages) if (vmf->pgoff >= kvm_rma_pages)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
page = pfn_to_page(ri->base_pfn + vmf->pgoff); page = pfn_to_page(ri->base_pfn + vmf->pgoff);
...@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_rma_release(struct inode *inode, struct file *filp) static int kvm_rma_release(struct inode *inode, struct file *filp)
{ {
struct kvmppc_linear_info *ri = filp->private_data; struct kvm_rma_info *ri = filp->private_data;
kvm_release_rma(ri); kvm_release_rma(ri);
return 0; return 0;
...@@ -1549,8 +1548,17 @@ static const struct file_operations kvm_rma_fops = { ...@@ -1549,8 +1548,17 @@ static const struct file_operations kvm_rma_fops = {
long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
{ {
struct kvmppc_linear_info *ri;
long fd; long fd;
struct kvm_rma_info *ri;
/*
* Only do this on PPC970 in HV mode
*/
if (!cpu_has_feature(CPU_FTR_HVMODE) ||
!cpu_has_feature(CPU_FTR_ARCH_201))
return -EINVAL;
if (!kvm_rma_pages)
return -EINVAL;
ri = kvm_alloc_rma(); ri = kvm_alloc_rma();
if (!ri) if (!ri)
...@@ -1560,7 +1568,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) ...@@ -1560,7 +1568,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
if (fd < 0) if (fd < 0)
kvm_release_rma(ri); kvm_release_rma(ri);
ret->rma_size = ri->npages << PAGE_SHIFT; ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
return fd; return fd;
} }
...@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) ...@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{ {
int err = 0; int err = 0;
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
struct kvmppc_linear_info *ri = NULL; struct kvm_rma_info *ri = NULL;
unsigned long hva; unsigned long hva;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) ...@@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
} else { } else {
/* Set up to use an RMO region */ /* Set up to use an RMO region */
rma_size = ri->npages; rma_size = kvm_rma_pages;
if (rma_size > memslot->npages) if (rma_size > memslot->npages)
rma_size = memslot->npages; rma_size = memslot->npages;
rma_size <<= PAGE_SHIFT; rma_size <<= PAGE_SHIFT;
rmls = lpcr_rmls(rma_size); rmls = lpcr_rmls(rma_size);
err = -EINVAL; err = -EINVAL;
if (rmls < 0) { if ((long)rmls < 0) {
pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
goto out_srcu; goto out_srcu;
} }
...@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) ...@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* POWER7 */ /* POWER7 */
lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
lpcr |= rmls << LPCR_RMLS_SH; lpcr |= rmls << LPCR_RMLS_SH;
kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
} }
kvm->arch.lpcr = lpcr; kvm->arch.lpcr = lpcr;
pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
/* Initialize phys addrs of pages in RMO */ /* Initialize phys addrs of pages in RMO */
npages = ri->npages; npages = kvm_rma_pages;
porder = __ilog2(npages); porder = __ilog2(npages);
physp = memslot->arch.slot_phys; physp = memslot->arch.slot_phys;
if (physp) { if (physp) {
...@@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) ...@@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
/* Allocate the guest's logical partition ID */ /* Allocate the guest's logical partition ID */
lpid = kvmppc_alloc_lpid(); lpid = kvmppc_alloc_lpid();
if (lpid < 0) if ((long)lpid < 0)
return -ENOMEM; return -ENOMEM;
kvm->arch.lpid = lpid; kvm->arch.lpid = lpid;
......
...@@ -13,33 +13,34 @@ ...@@ -13,33 +13,34 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/memblock.h>
#include <linux/sizes.h>
#include <asm/cputable.h> #include <asm/cputable.h>
#include <asm/kvm_ppc.h> #include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h> #include <asm/kvm_book3s.h>
#define KVM_LINEAR_RMA 0 #include "book3s_hv_cma.h"
#define KVM_LINEAR_HPT 1 /*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
static void __init kvm_linear_init_one(ulong size, int count, int type); * should be power of 2.
static struct kvmppc_linear_info *kvm_alloc_linear(int type); */
static void kvm_release_linear(struct kvmppc_linear_info *ri); #define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */
/*
int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; * By default we reserve 5% of memory for hash pagetable allocation.
EXPORT_SYMBOL_GPL(kvm_hpt_order); */
static unsigned long kvm_cma_resv_ratio = 5;
/*************** RMA *************/
/* /*
* This maintains a list of RMAs (real mode areas) for KVM guests to use. * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
* Each RMA has to be physically contiguous and of a size that the * Each RMA has to be physically contiguous and of a size that the
* hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
* and other larger sizes. Since we are unlikely to be allocate that * and other larger sizes. Since we are unlikely to be allocate that
* much physically contiguous memory after the system is up and running, * much physically contiguous memory after the system is up and running,
* we preallocate a set of RMAs in early boot for KVM to use. * we preallocate a set of RMAs in early boot using CMA.
* should be power of 2.
*/ */
static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
static unsigned long kvm_rma_count; EXPORT_SYMBOL_GPL(kvm_rma_pages);
/* Work out RMLS (real mode limit selector) field value for a given RMA size. /* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */ Assumes POWER7 or PPC970. */
...@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size) ...@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size)
static int __init early_parse_rma_size(char *p) static int __init early_parse_rma_size(char *p)
{ {
if (!p) unsigned long kvm_rma_size;
return 1;
pr_debug("%s(%s)\n", __func__, p);
if (!p)
return -EINVAL;
kvm_rma_size = memparse(p, &p); kvm_rma_size = memparse(p, &p);
/*
* Check that the requested size is one supported in hardware
*/
if (lpcr_rmls(kvm_rma_size) < 0) {
pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
return -EINVAL;
}
kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
return 0; return 0;
} }
early_param("kvm_rma_size", early_parse_rma_size); early_param("kvm_rma_size", early_parse_rma_size);
static int __init early_parse_rma_count(char *p) struct kvm_rma_info *kvm_alloc_rma()
{ {
if (!p) struct page *page;
return 1; struct kvm_rma_info *ri;
kvm_rma_count = simple_strtoul(p, NULL, 0); ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
if (!ri)
return 0; return NULL;
} page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
early_param("kvm_rma_count", early_parse_rma_count); if (!page)
goto err_out;
struct kvmppc_linear_info *kvm_alloc_rma(void) atomic_set(&ri->use_count, 1);
{ ri->base_pfn = page_to_pfn(page);
return kvm_alloc_linear(KVM_LINEAR_RMA); return ri;
err_out:
kfree(ri);
return NULL;
} }
EXPORT_SYMBOL_GPL(kvm_alloc_rma); EXPORT_SYMBOL_GPL(kvm_alloc_rma);
void kvm_release_rma(struct kvmppc_linear_info *ri) void kvm_release_rma(struct kvm_rma_info *ri)
{ {
kvm_release_linear(ri); if (atomic_dec_and_test(&ri->use_count)) {
kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
kfree(ri);
}
} }
EXPORT_SYMBOL_GPL(kvm_release_rma); EXPORT_SYMBOL_GPL(kvm_release_rma);
/*************** HPT *************/ static int __init early_parse_kvm_cma_resv(char *p)
/*
* This maintains a list of big linear HPT tables that contain the GVA->HPA
* memory mappings. If we don't reserve those early on, we might not be able
* to get a big (usually 16MB) linear memory region from the kernel anymore.
*/
static unsigned long kvm_hpt_count;
static int __init early_parse_hpt_count(char *p)
{ {
pr_debug("%s(%s)\n", __func__, p);
if (!p) if (!p)
return 1; return -EINVAL;
return kstrtoul(p, 0, &kvm_cma_resv_ratio);
kvm_hpt_count = simple_strtoul(p, NULL, 0);
return 0;
} }
early_param("kvm_hpt_count", early_parse_hpt_count); early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
struct kvmppc_linear_info *kvm_alloc_hpt(void) struct page *kvm_alloc_hpt(unsigned long nr_pages)
{ {
return kvm_alloc_linear(KVM_LINEAR_HPT); unsigned long align_pages = HPT_ALIGN_PAGES;
/* Old CPUs require HPT aligned on a multiple of its size */
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_pages = nr_pages;
return kvm_alloc_cma(nr_pages, align_pages);
} }
EXPORT_SYMBOL_GPL(kvm_alloc_hpt); EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
void kvm_release_hpt(struct kvmppc_linear_info *li) void kvm_release_hpt(struct page *page, unsigned long nr_pages)
{ {
kvm_release_linear(li); kvm_release_cma(page, nr_pages);
} }
EXPORT_SYMBOL_GPL(kvm_release_hpt); EXPORT_SYMBOL_GPL(kvm_release_hpt);
/*************** generic *************/ /**
* kvm_cma_reserve() - reserve area for kvm hash pagetable
static LIST_HEAD(free_linears); *
static DEFINE_SPINLOCK(linear_lock); * This function reserves memory from early allocator. It should be
* called by arch specific code once the early allocator (memblock or bootmem)
static void __init kvm_linear_init_one(ulong size, int count, int type) * has been activated and all other subsystems have already allocated/reserved
{ * memory.
unsigned long i;
unsigned long j, npages;
void *linear;
struct page *pg;
const char *typestr;
struct kvmppc_linear_info *linear_info;
if (!count)
return;
typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
npages = size >> PAGE_SHIFT;
linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
for (i = 0; i < count; ++i) {
linear = alloc_bootmem_align(size, size);
pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
size >> 20);
linear_info[i].base_virt = linear;
linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
linear_info[i].npages = npages;
linear_info[i].type = type;
list_add_tail(&linear_info[i].list, &free_linears);
atomic_set(&linear_info[i].use_count, 0);
pg = pfn_to_page(linear_info[i].base_pfn);
for (j = 0; j < npages; ++j) {
atomic_inc(&pg->_count);
++pg;
}
}
}
static struct kvmppc_linear_info *kvm_alloc_linear(int type)
{
struct kvmppc_linear_info *ri, *ret;
ret = NULL;
spin_lock(&linear_lock);
list_for_each_entry(ri, &free_linears, list) {
if (ri->type != type)
continue;
list_del(&ri->list);
atomic_inc(&ri->use_count);
memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
ret = ri;
break;
}
spin_unlock(&linear_lock);
return ret;
}
static void kvm_release_linear(struct kvmppc_linear_info *ri)
{
if (atomic_dec_and_test(&ri->use_count)) {
spin_lock(&linear_lock);
list_add_tail(&ri->list, &free_linears);
spin_unlock(&linear_lock);
}
}
/*
* Called at boot time while the bootmem allocator is active,
* to allocate contiguous physical memory for the hash page
* tables for guests.
*/ */
void __init kvm_linear_init(void) void __init kvm_cma_reserve(void)
{ {
/* HPT */ unsigned long align_size;
kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); struct memblock_region *reg;
phys_addr_t selected_size = 0;
/* RMA */ /*
/* Only do this on PPC970 in HV mode */ * We cannot use memblock_phys_mem_size() here, because
if (!cpu_has_feature(CPU_FTR_HVMODE) || * memblock_analyze() has not been called yet.
!cpu_has_feature(CPU_FTR_ARCH_201)) */
return; for_each_memblock(memory, reg)
selected_size += memblock_region_memory_end_pfn(reg) -
if (!kvm_rma_size || !kvm_rma_count) memblock_region_memory_base_pfn(reg);
return;
selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
/* Check that the requested size is one supported in hardware */ if (selected_size) {
if (lpcr_rmls(kvm_rma_size) < 0) { pr_debug("%s: reserving %ld MiB for global area\n", __func__,
pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); (unsigned long)selected_size / SZ_1M);
return; /*
* Old CPUs require HPT aligned on a multiple of its size. So for them
* make the alignment as max size we could request.
*/
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_size = __rounddown_pow_of_two(selected_size);
else
align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
kvm_cma_declare_contiguous(selected_size, align_size);
} }
kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
} }
/*
* Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
* for DMA mapping framework
*
* Copyright IBM Corporation, 2013
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License or (at your optional) any later version of the license.
*
*/
#define pr_fmt(fmt) "kvm_cma: " fmt
#ifdef CONFIG_CMA_DEBUG
#ifndef DEBUG
# define DEBUG
#endif
#endif
#include <linux/memblock.h>
#include <linux/mutex.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include "book3s_hv_cma.h"
struct kvm_cma {
unsigned long base_pfn;
unsigned long count;
unsigned long *bitmap;
};
static DEFINE_MUTEX(kvm_cma_mutex);
static struct kvm_cma kvm_cma_area;
/**
* kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
* for kvm hash pagetable
* @size: Size of the reserved memory.
* @alignment: Alignment for the contiguous memory area
*
* This function reserves memory for kvm cma area. It should be
* called by arch code when early allocator (memblock or bootmem)
* is still activate.
*/
long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
{
long base_pfn;
phys_addr_t addr;
struct kvm_cma *cma = &kvm_cma_area;
pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
if (!size)
return -EINVAL;
/*
* Sanitise input arguments.
* We should be pageblock aligned for CMA.
*/
alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
size = ALIGN(size, alignment);
/*
* Reserve memory
* Use __memblock_alloc_base() since
* memblock_alloc_base() panic()s.
*/
addr = __memblock_alloc_base(size, alignment, 0);
if (!addr) {
base_pfn = -ENOMEM;
goto err;
} else
base_pfn = PFN_DOWN(addr);
/*
* Each reserved area must be initialised later, when more kernel
* subsystems (like slab allocator) are available.
*/
cma->base_pfn = base_pfn;
cma->count = size >> PAGE_SHIFT;
pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
return 0;
err:
pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
return base_pfn;
}
/**
* kvm_alloc_cma() - allocate pages from contiguous area
* @nr_pages: Requested number of pages.
* @align_pages: Requested alignment in number of pages
*
* This function allocates memory buffer for hash pagetable.
*/
struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
{
int ret;
struct page *page = NULL;
struct kvm_cma *cma = &kvm_cma_area;
unsigned long chunk_count, nr_chunk;
unsigned long mask, pfn, pageno, start = 0;
if (!cma || !cma->count)
return NULL;
pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
(void *)cma, nr_pages, align_pages);
if (!nr_pages)
return NULL;
/*
* align mask with chunk size. The bit tracks pages in chunk size
*/
VM_BUG_ON(!is_power_of_2(align_pages));
mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
mutex_lock(&kvm_cma_mutex);
for (;;) {
pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
start, nr_chunk, mask);
if (pageno >= chunk_count)
break;
pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
if (ret == 0) {
bitmap_set(cma->bitmap, pageno, nr_chunk);
page = pfn_to_page(pfn);
memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
break;
} else if (ret != -EBUSY) {
break;
}
pr_debug("%s(): memory range at %p is busy, retrying\n",
__func__, pfn_to_page(pfn));
/* try again with a bit different memory target */
start = pageno + mask + 1;
}
mutex_unlock(&kvm_cma_mutex);
pr_debug("%s(): returned %p\n", __func__, page);
return page;
}
/**
* kvm_release_cma() - release allocated pages for hash pagetable
* @pages: Allocated pages.
* @nr_pages: Number of allocated pages.
*
* This function releases memory allocated by kvm_alloc_cma().
* It returns false when provided pages do not belong to contiguous area and
* true otherwise.
*/
bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
{
unsigned long pfn;
unsigned long nr_chunk;
struct kvm_cma *cma = &kvm_cma_area;
if (!cma || !pages)
return false;
pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
pfn = page_to_pfn(pages);
if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
return false;
VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
mutex_lock(&kvm_cma_mutex);
bitmap_clear(cma->bitmap,
(pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
nr_chunk);
free_contig_range(pfn, nr_pages);
mutex_unlock(&kvm_cma_mutex);
return true;
}
static int __init kvm_cma_activate_area(unsigned long base_pfn,
unsigned long count)
{
unsigned long pfn = base_pfn;
unsigned i = count >> pageblock_order;
struct zone *zone;
WARN_ON_ONCE(!pfn_valid(pfn));
zone = page_zone(pfn_to_page(pfn));
do {
unsigned j;
base_pfn = pfn;
for (j = pageblock_nr_pages; j; --j, pfn++) {
WARN_ON_ONCE(!pfn_valid(pfn));
/*
* alloc_contig_range requires the pfn range
* specified to be in the same zone. Make this
* simple by forcing the entire CMA resv range
* to be in the same zone.
*/
if (page_zone(pfn_to_page(pfn)) != zone)
return -EINVAL;
}
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
} while (--i);
return 0;
}
static int __init kvm_cma_init_reserved_areas(void)
{
int bitmap_size, ret;
unsigned long chunk_count;
struct kvm_cma *cma = &kvm_cma_area;
pr_debug("%s()\n", __func__);
if (!cma->count)
return 0;
chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
if (!cma->bitmap)
return -ENOMEM;
ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
if (ret)
goto error;
return 0;
error:
kfree(cma->bitmap);
return ret;
}
core_initcall(kvm_cma_init_reserved_areas);
/*
* Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
* for DMA mapping framework
*
* Copyright IBM Corporation, 2013
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License or (at your optional) any later version of the license.
*
*/
#ifndef __POWERPC_KVM_CMA_ALLOC_H__
#define __POWERPC_KVM_CMA_ALLOC_H__
/*
* Both RMA and Hash page allocation will be multiple of 256K.
*/
#define KVM_CMA_CHUNK_ORDER 18
extern struct page *kvm_alloc_cma(unsigned long nr_pages,
unsigned long align_pages);
extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
extern long kvm_cma_declare_contiguous(phys_addr_t size,
phys_addr_t alignment) __init;
#endif
...@@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock) ...@@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock)
return old == 0; return old == 0;
} }
/*
* tlbie/tlbiel is a bit different on the PPC970 compared to later
* processors such as POWER7; the large page bit is in the instruction
* not RB, and the top 16 bits and the bottom 12 bits of the VA
* in RB must be 0.
*/
static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
long i;
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i) {
unsigned long rb = rbvalues[i];
if (rb & 1) /* large page */
asm volatile("tlbie %0,1" : :
"r" (rb & 0x0000fffffffff000ul));
else
asm volatile("tlbie %0,0" : :
"r" (rb & 0x0000fffffffff000ul));
}
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i) {
unsigned long rb = rbvalues[i];
if (rb & 1) /* large page */
asm volatile("tlbiel %0,1" : :
"r" (rb & 0x0000fffffffff000ul));
else
asm volatile("tlbiel %0,0" : :
"r" (rb & 0x0000fffffffff000ul));
}
asm volatile("ptesync" : : : "memory");
}
}
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
long i;
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
/* PPC970 tlbie instruction is a bit different */
do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
return;
}
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i)
asm volatile(PPC_TLBIE(%1,%0) : :
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i)
asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
asm volatile("ptesync" : : : "memory");
}
}
long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
unsigned long pte_index, unsigned long avpn, unsigned long pte_index, unsigned long avpn,
unsigned long *hpret) unsigned long *hpret)
...@@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, ...@@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) { if (v & HPTE_V_VALID) {
hpte[0] &= ~HPTE_V_VALID; hpte[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(v, hpte[1], pte_index); rb = compute_tlbie_rb(v, hpte[1], pte_index);
if (global_invalidates(kvm, flags)) { do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
: : "r" (rb), "r" (kvm->arch.lpid));
asm volatile("ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
asm volatile("ptesync" : : : "memory");
asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory");
}
/* Read PTE low word after tlbie to get final R/C values */ /* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
} }
...@@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
unsigned long *hp, *hptes[4], tlbrb[4]; unsigned long *hp, *hptes[4], tlbrb[4];
long int i, j, k, n, found, indexes[4]; long int i, j, k, n, found, indexes[4];
unsigned long flags, req, pte_index, rcbits; unsigned long flags, req, pte_index, rcbits;
long int local = 0; int global;
long int ret = H_SUCCESS; long int ret = H_SUCCESS;
struct revmap_entry *rev, *revs[4]; struct revmap_entry *rev, *revs[4];
if (atomic_read(&kvm->online_vcpus) == 1) global = global_invalidates(kvm, 0);
local = 1;
for (i = 0; i < 4 && ret == H_SUCCESS; ) { for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0; n = 0;
for (; i < 4; ++i) { for (; i < 4; ++i) {
...@@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
break; break;
/* Now that we've collected a batch, do the tlbies */ /* Now that we've collected a batch, do the tlbies */
if (!local) { do_tlbies(kvm, tlbrb, n, global, true);
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
for (k = 0; k < n; ++k)
asm volatile(PPC_TLBIE(%1,%0) : :
"r" (tlbrb[k]),
"r" (kvm->arch.lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
asm volatile("ptesync" : : : "memory");
for (k = 0; k < n; ++k)
asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
asm volatile("ptesync" : : : "memory");
}
/* Read PTE low words after tlbie to get final R/C values */ /* Read PTE low words after tlbie to get final R/C values */
for (k = 0; k < n; ++k) { for (k = 0; k < n; ++k) {
...@@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (v & HPTE_V_VALID) { if (v & HPTE_V_VALID) {
rb = compute_tlbie_rb(v, r, pte_index); rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = v & ~HPTE_V_VALID; hpte[0] = v & ~HPTE_V_VALID;
if (global_invalidates(kvm, flags)) { do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
: : "r" (rb), "r" (kvm->arch.lpid));
asm volatile("ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
asm volatile("ptesync" : : : "memory");
asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory");
}
/* /*
* If the host has this page as readonly but the guest * If the host has this page as readonly but the guest
* wants to make it read/write, reduce the permissions. * wants to make it read/write, reduce the permissions.
...@@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, ...@@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
hptep[0] &= ~HPTE_V_VALID; hptep[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) do_tlbies(kvm, &rb, 1, 1, true);
cpu_relax();
asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
: : "r" (rb), "r" (kvm->arch.lpid));
asm volatile("ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} }
EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
...@@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, ...@@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
rbyte = (hptep[1] & ~HPTE_R_R) >> 8; rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
/* modify only the second-last byte, which contains the ref bit */ /* modify only the second-last byte, which contains the ref bit */
*((char *)hptep + 14) = rbyte; *((char *)hptep + 14) = rbyte;
while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) do_tlbies(kvm, &rb, 1, 1, false);
cpu_relax();
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
: : "r" (rb), "r" (kvm->arch.lpid));
asm volatile("ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} }
EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
......
...@@ -1381,7 +1381,7 @@ hcall_try_real_mode: ...@@ -1381,7 +1381,7 @@ hcall_try_real_mode:
cmpldi r3,hcall_real_table_end - hcall_real_table cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont bge guest_exit_cont
LOAD_REG_ADDR(r4, hcall_real_table) LOAD_REG_ADDR(r4, hcall_real_table)
lwzx r3,r3,r4 lwax r3,r3,r4
cmpwi r3,0 cmpwi r3,0
beq guest_exit_cont beq guest_exit_cont
add r3,r3,r4 add r3,r3,r4
......
...@@ -92,6 +92,11 @@ kvm_start_lightweight: ...@@ -92,6 +92,11 @@ kvm_start_lightweight:
PPC_LL r3, VCPU_HFLAGS(r4) PPC_LL r3, VCPU_HFLAGS(r4)
rldicl r3, r3, 0, 63 /* r3 &= 1 */ rldicl r3, r3, 0, 63 /* r3 &= 1 */
stb r3, HSTATE_RESTORE_HID5(r13) stb r3, HSTATE_RESTORE_HID5(r13)
/* Load up guest SPRG3 value, since it's user readable */
ld r3, VCPU_SHARED(r4)
ld r3, VCPU_SHARED_SPRG3(r3)
mtspr SPRN_SPRG3, r3
#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_BOOK3S_64 */
PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
...@@ -123,6 +128,15 @@ kvmppc_handler_highmem: ...@@ -123,6 +128,15 @@ kvmppc_handler_highmem:
/* R7 = vcpu */ /* R7 = vcpu */
PPC_LL r7, GPR4(r1) PPC_LL r7, GPR4(r1)
#ifdef CONFIG_PPC_BOOK3S_64
/*
* Reload kernel SPRG3 value.
* No need to save guest value as usermode can't modify SPRG3.
*/
ld r3, PACA_SPRG3(r13)
mtspr SPRN_SPRG3, r3
#endif /* CONFIG_PPC_BOOK3S_64 */
PPC_STL r14, VCPU_GPR(R14)(r7) PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7) PPC_STL r15, VCPU_GPR(R15)(r7)
PPC_STL r16, VCPU_GPR(R16)(r7) PPC_STL r16, VCPU_GPR(R16)(r7)
......
...@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) ...@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
* both the traditional FP registers and the added VSX * both the traditional FP registers and the added VSX
* registers into thread.fpr[]. * registers into thread.fpr[].
*/ */
giveup_fpu(current); if (current->thread.regs->msr & MSR_FP)
giveup_fpu(current);
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
...@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) ...@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
#ifdef CONFIG_ALTIVEC #ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) { if (msr & MSR_VEC) {
giveup_altivec(current); if (current->thread.regs->msr & MSR_VEC)
giveup_altivec(current);
memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
vcpu->arch.vscr = t->vscr; vcpu->arch.vscr = t->vscr;
} }
...@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ...@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
printk(KERN_INFO "Loading up ext 0x%lx\n", msr); printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
#endif #endif
current->thread.regs->msr |= msr;
if (msr & MSR_FP) { if (msr & MSR_FP) {
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
...@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ...@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
#endif #endif
} }
current->thread.regs->msr |= msr;
vcpu->arch.guest_owned_ext |= msr; vcpu->arch.guest_owned_ext |= msr;
kvmppc_recalc_shadow_msr(vcpu); kvmppc_recalc_shadow_msr(vcpu);
return RESUME_GUEST; return RESUME_GUEST;
} }
/*
* Kernel code using FP or VMX could have flushed guest state to
* the thread_struct; if so, get it back now.
*/
static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
{
unsigned long lost_ext;
lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
if (!lost_ext)
return;
if (lost_ext & MSR_FP)
kvmppc_load_up_fpu();
if (lost_ext & MSR_VEC)
kvmppc_load_up_altivec();
current->thread.regs->msr |= lost_ext;
}
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr) unsigned int exit_nr)
{ {
...@@ -772,7 +792,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -772,7 +792,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
} }
case BOOK3S_INTERRUPT_SYSCALL: case BOOK3S_INTERRUPT_SYSCALL:
if (vcpu->arch.papr_enabled && if (vcpu->arch.papr_enabled &&
(kvmppc_get_last_inst(vcpu) == 0x44000022) && (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
!(vcpu->arch.shared->msr & MSR_PR)) { !(vcpu->arch.shared->msr & MSR_PR)) {
/* SC 1 papr hypercalls */ /* SC 1 papr hypercalls */
ulong cmd = kvmppc_get_gpr(vcpu, 3); ulong cmd = kvmppc_get_gpr(vcpu, 3);
...@@ -890,8 +910,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -890,8 +910,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_enable(); local_irq_enable();
r = s; r = s;
} else { } else {
kvmppc_lazy_ee_enable(); kvmppc_fix_ee_before_entry();
} }
kvmppc_handle_lost_ext(vcpu);
} }
trace_kvm_book3s_reenter(r, vcpu); trace_kvm_book3s_reenter(r, vcpu);
...@@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) ...@@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (err) if (err)
goto free_shadow_vcpu; goto free_shadow_vcpu;
err = -ENOMEM;
p = __get_free_page(GFP_KERNEL|__GFP_ZERO); p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
/* the real shared page fills the last 4k of our page */
vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
if (!p) if (!p)
goto uninit_vcpu; goto uninit_vcpu;
/* the real shared page fills the last 4k of our page */
vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
/* default to book3s_64 (970fx) */ /* default to book3s_64 (970fx) */
...@@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ...@@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_FP) if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
kvmppc_lazy_ee_enable(); kvmppc_fix_ee_before_entry();
ret = __kvmppc_vcpu_run(kvm_run, vcpu); ret = __kvmppc_vcpu_run(kvm_run, vcpu);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <asm/hvcall.h> #include <asm/hvcall.h>
#include <asm/xics.h> #include <asm/xics.h>
#include <asm/debug.h> #include <asm/debug.h>
#include <asm/time.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
......
...@@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ...@@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
goto out; goto out;
} }
kvm_guest_enter();
#ifdef CONFIG_PPC_FPU #ifdef CONFIG_PPC_FPU
/* Save userspace FPU state in stack */ /* Save userspace FPU state in stack */
enable_kernel_fp(); enable_kernel_fp();
...@@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ...@@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_load_guest_fp(vcpu); kvmppc_load_guest_fp(vcpu);
#endif #endif
kvmppc_lazy_ee_enable(); kvmppc_fix_ee_before_entry();
ret = __kvmppc_vcpu_run(kvm_run, vcpu); ret = __kvmppc_vcpu_run(kvm_run, vcpu);
...@@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_enable(); local_irq_enable();
r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
} else { } else {
kvmppc_lazy_ee_enable(); kvmppc_fix_ee_before_entry();
} }
} }
......
...@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) ...@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
kvm_guest_exit(); kvm_guest_exit();
continue; continue;
} }
trace_hardirqs_on();
#endif #endif
kvm_guest_enter(); kvm_guest_enter();
......
...@@ -200,11 +200,9 @@ config DMA_SHARED_BUFFER ...@@ -200,11 +200,9 @@ config DMA_SHARED_BUFFER
APIs extension; the file's descriptor can then be passed on to other APIs extension; the file's descriptor can then be passed on to other
driver. driver.
config CMA config DMA_CMA
bool "Contiguous Memory Allocator" bool "DMA Contiguous Memory Allocator"
depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK depends on HAVE_DMA_CONTIGUOUS && CMA
select MIGRATION
select MEMORY_ISOLATION
help help
This enables the Contiguous Memory Allocator which allows drivers This enables the Contiguous Memory Allocator which allows drivers
to allocate big physically-contiguous blocks of memory for use with to allocate big physically-contiguous blocks of memory for use with
...@@ -213,17 +211,7 @@ config CMA ...@@ -213,17 +211,7 @@ config CMA
For more information see <include/linux/dma-contiguous.h>. For more information see <include/linux/dma-contiguous.h>.
If unsure, say "n". If unsure, say "n".
if CMA if DMA_CMA
config CMA_DEBUG
bool "CMA debug messages (DEVELOPMENT)"
depends on DEBUG_KERNEL
help
Turns on debug messages in CMA. This produces KERN_DEBUG
messages for every CMA call as well as various messages while
processing calls such as dma_alloc_from_contiguous().
This option does not affect warning and error messages.
comment "Default contiguous memory area size:" comment "Default contiguous memory area size:"
config CMA_SIZE_MBYTES config CMA_SIZE_MBYTES
......
...@@ -6,7 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ ...@@ -6,7 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \
attribute_container.o transport_class.o \ attribute_container.o transport_class.o \
topology.o topology.o
obj-$(CONFIG_DEVTMPFS) += devtmpfs.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o
obj-$(CONFIG_CMA) += dma-contiguous.o obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
obj-y += power/ obj-y += power/
obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAS_DMA) += dma-mapping.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
......
...@@ -57,7 +57,7 @@ struct cma; ...@@ -57,7 +57,7 @@ struct cma;
struct page; struct page;
struct device; struct device;
#ifdef CONFIG_CMA #ifdef CONFIG_DMA_CMA
/* /*
* There is always at least global CMA area and a few optional device * There is always at least global CMA area and a few optional device
......
...@@ -478,6 +478,30 @@ config FRONTSWAP ...@@ -478,6 +478,30 @@ config FRONTSWAP
If unsure, say Y to enable frontswap. If unsure, say Y to enable frontswap.
config CMA
bool "Contiguous Memory Allocator"
depends on HAVE_MEMBLOCK
select MIGRATION
select MEMORY_ISOLATION
help
This enables the Contiguous Memory Allocator which allows other
subsystems to allocate big physically-contiguous blocks of memory.
CMA reserves a region of memory and allows only movable pages to
be allocated from it. This way, the kernel can use the memory for
pagecache and when a subsystem requests for contiguous area, the
allocated pages are migrated away to serve the contiguous request.
If unsure, say "n".
config CMA_DEBUG
bool "CMA debug messages (DEVELOPMENT)"
depends on DEBUG_KERNEL && CMA
help
Turns on debug messages in CMA. This produces KERN_DEBUG
messages for every CMA call as well as various messages while
processing calls such as dma_alloc_from_contiguous().
This option does not affect warning and error messages.
config ZBUD config ZBUD
tristate tristate
default n default n
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册