提交 a9f6cf96 编写于 作者: G Gleb Natapov

Merge branch 'kvm-ppc-next' of git://github.com/agraf/linux-2.6 into queue

* 'kvm-ppc-next' of git://github.com/agraf/linux-2.6:
  KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate()
  KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls
  KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX
  KVM: PPC: Book3S: Fix compile error in XICS emulation
  KVM: PPC: Book3S PR: return appropriate error when allocation fails
  arch: powerpc: kvm: add signed type cast for comparation
  powerpc/kvm: Copy the pvr value after memset
  KVM: PPC: Book3S PR: Load up SPRG3 register with guest value on guest entry
  kvm/ppc/booke: Don't call kvm_guest_enter twice
  kvm/ppc: Call trace_hardirqs_on before entry
  KVM: PPC: Book3S HV: Allow negative offsets to real-mode hcall handlers
  KVM: PPC: Book3S HV: Correct tlbie usage
  powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
  powerpc/kvm: Contiguous memory allocator based RMA allocation
  powerpc/kvm: Contiguous memory allocator based hash page table allocation
  KVM: PPC: Book3S: Ignore DABR register
  mm/cma: Move dma contiguous changes into a seperate config
......@@ -2,7 +2,7 @@
#define ASMARM_DMA_CONTIGUOUS_H
#ifdef __KERNEL__
#ifdef CONFIG_CMA
#ifdef CONFIG_DMA_CMA
#include <linux/types.h>
#include <asm-generic/dma-contiguous.h>
......
......@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
if (!pages)
goto no_pages;
if (IS_ENABLED(CONFIG_CMA))
if (IS_ENABLED(CONFIG_DMA_CMA))
ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
atomic_pool_init);
else
......@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
addr = __alloc_simple_buffer(dev, size, gfp, &page);
else if (!(gfp & __GFP_WAIT))
addr = __alloc_from_pool(size, &page);
else if (!IS_ENABLED(CONFIG_CMA))
else if (!IS_ENABLED(CONFIG_DMA_CMA))
addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
else
addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
......@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
__dma_free_buffer(page, size);
} else if (__free_from_pool(cpu_addr, size)) {
return;
} else if (!IS_ENABLED(CONFIG_CMA)) {
} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
__dma_free_remap(cpu_addr, size);
__dma_free_buffer(page, size);
} else {
......
......@@ -334,6 +334,27 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
return r;
}
/*
* Like kvmppc_get_last_inst(), but for fetching a sc instruction.
* Because the sc instruction sets SRR0 to point to the following
* instruction, we have to fetch from pc - 4.
*/
static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
{
ulong pc = kvmppc_get_pc(vcpu) - 4;
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
u32 r;
/* Load the instruction manually if it failed to do so in the
* exit path */
if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
r = svcpu->last_inst;
svcpu_put(svcpu);
return r;
}
static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
{
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
......@@ -446,6 +467,23 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
return vcpu->arch.last_inst;
}
/*
* Like kvmppc_get_last_inst(), but for fetching a sc instruction.
* Because the sc instruction sets SRR0 to point to the following
* instruction, we have to fetch from pc - 4.
*/
static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
{
ulong pc = kvmppc_get_pc(vcpu) - 4;
/* Load the instruction manually if it failed to do so in the
* exit path */
if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
return vcpu->arch.last_inst;
}
static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault_dar;
......
......@@ -37,7 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
#ifdef CONFIG_KVM_BOOK3S_64_HV
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
extern int kvm_hpt_order; /* order of preallocated HPTs */
extern unsigned long kvm_rma_pages;
#endif
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
......@@ -100,7 +100,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
/* (masks depend on page size) */
rb |= 0x1000; /* page encoding in LP field */
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
rb |= ((va_low << 4) & 0xf0); /* AVAL field (P7 doesn't seem to care) */
}
} else {
/* 4kB page */
......
......@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
struct page *pages[0];
};
struct kvmppc_linear_info {
void *base_virt;
unsigned long base_pfn;
unsigned long npages;
struct list_head list;
atomic_t use_count;
int type;
struct kvm_rma_info {
atomic_t use_count;
unsigned long base_pfn;
};
/* XICS components, defined in book3s_xics.c */
......@@ -246,7 +242,7 @@ struct kvm_arch {
int tlbie_lock;
unsigned long lpcr;
unsigned long rmor;
struct kvmppc_linear_info *rma;
struct kvm_rma_info *rma;
unsigned long vrma_slb_v;
int rma_setup_done;
int using_mmu_notifiers;
......@@ -259,7 +255,7 @@ struct kvm_arch {
spinlock_t slot_phys_lock;
cpumask_t need_tlb_flush;
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li;
int hpt_cma_alloc;
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
......
......@@ -137,10 +137,10 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
struct kvm_allocate_rma *rma);
extern struct kvmppc_linear_info *kvm_alloc_rma(void);
extern void kvm_release_rma(struct kvmppc_linear_info *ri);
extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
extern void kvm_release_hpt(struct kvmppc_linear_info *li);
extern struct kvm_rma_info *kvm_alloc_rma(void);
extern void kvm_release_rma(struct kvm_rma_info *ri);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
......@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
struct openpic;
#ifdef CONFIG_KVM_BOOK3S_64_HV
extern void kvm_cma_reserve(void) __init;
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{
paca[cpu].kvm_hstate.xics_phys = addr;
......@@ -281,13 +282,12 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
}
extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
extern void kvm_linear_init(void);
#else
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
static inline void __init kvm_cma_reserve(void)
{}
static inline void kvm_linear_init(void)
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{}
static inline u32 kvmppc_get_xics_latch(void)
......@@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
}
}
/* Please call after prepare_to_enter. This function puts the lazy ee state
back to normal mode, without actually enabling interrupts. */
static inline void kvmppc_lazy_ee_enable(void)
/*
* Please call after prepare_to_enter. This function puts the lazy ee and irq
* disabled tracking state back to normal mode, without actually enabling
* interrupts.
*/
static inline void kvmppc_fix_ee_before_entry(void)
{
trace_hardirqs_on();
#ifdef CONFIG_PPC64
/* Only need to enable IRQs by hard enabling them after this */
local_paca->irq_happened = 0;
......
......@@ -451,6 +451,7 @@ int main(void)
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
#endif
DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
......
......@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr)
/* Initialize the hash table or TLB handling */
early_init_mmu();
kvm_cma_reserve();
/*
* Reserve any gigantic pages requested on the command line.
* memblock needs to have been initialized by the time this is
......@@ -609,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
/* Initialize the MMU context management stuff */
mmu_context_init();
kvm_linear_init();
/* Interrupt code needs to be 64K-aligned */
if ((unsigned long)_stext & 0xffff)
panic("Kernelbase not 64K-aligned (0x%lx)!\n",
......
......@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
depends on KVM_BOOK3S_64
select MMU_NOTIFIER
select CMA
---help---
Support running unmodified book3s_64 guest kernels in
virtual machines on POWER7 and PPC970 processors that have
......
......@@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_64_vio_hv.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
book3s_hv_cma.o \
$(kvm-book3s_64-builtin-xics-objs-y)
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
......
......@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
hva_t ptegp;
u64 pteg[16];
u64 avpn = 0;
u64 v, r;
u64 v_val, v_mask;
u64 eaddr_mask;
int i;
u8 key = 0;
u8 pp, key = 0;
bool found = false;
int second = 0;
bool second = false;
ulong mp_ea = vcpu->arch.magic_page_ea;
/* Magic page override */
......@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
goto no_seg_found;
avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
v_val = avpn & HPTE_V_AVPN;
if (slbe->tb)
avpn |= SLB_VSID_B_1T;
v_val |= SLB_VSID_B_1T;
if (slbe->large)
v_val |= HPTE_V_LARGE;
v_val |= HPTE_V_VALID;
v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
HPTE_V_SECONDARY;
do_second:
ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
......@@ -227,91 +238,74 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
key = 4;
for (i=0; i<16; i+=2) {
u64 v = pteg[i];
u64 r = pteg[i+1];
/* Valid check */
if (!(v & HPTE_V_VALID))
continue;
/* Hash check */
if ((v & HPTE_V_SECONDARY) != second)
continue;
/* AVPN compare */
if (HPTE_V_COMPARE(avpn, v)) {
u8 pp = (r & HPTE_R_PP) | key;
int eaddr_mask = 0xFFF;
gpte->eaddr = eaddr;
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
eaddr,
data);
if (slbe->large)
eaddr_mask = 0xFFFFFF;
gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
gpte->may_execute = ((r & HPTE_R_N) ? false : true);
gpte->may_read = false;
gpte->may_write = false;
switch (pp) {
case 0:
case 1:
case 2:
case 6:
gpte->may_write = true;
/* fall through */
case 3:
case 5:
case 7:
gpte->may_read = true;
break;
}
dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
"-> 0x%lx\n",
eaddr, avpn, gpte->vpage, gpte->raddr);
/* Check all relevant fields of 1st dword */
if ((pteg[i] & v_mask) == v_val) {
found = true;
break;
}
}
/* Update PTE R and C bits, so the guest's swapper knows we used the
* page */
if (found) {
u32 oldr = pteg[i+1];
if (!found) {
if (second)
goto no_page_found;
v_val |= HPTE_V_SECONDARY;
second = true;
goto do_second;
}
if (gpte->may_read) {
/* Set the accessed flag */
pteg[i+1] |= HPTE_R_R;
}
if (gpte->may_write) {
/* Set the dirty flag */
pteg[i+1] |= HPTE_R_C;
} else {
dprintk("KVM: Mapping read-only page!\n");
}
v = pteg[i];
r = pteg[i+1];
pp = (r & HPTE_R_PP) | key;
eaddr_mask = 0xFFF;
gpte->eaddr = eaddr;
gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
if (slbe->large)
eaddr_mask = 0xFFFFFF;
gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
gpte->may_execute = ((r & HPTE_R_N) ? false : true);
gpte->may_read = false;
gpte->may_write = false;
switch (pp) {
case 0:
case 1:
case 2:
case 6:
gpte->may_write = true;
/* fall through */
case 3:
case 5:
case 7:
gpte->may_read = true;
break;
}
/* Write back into the PTEG */
if (pteg[i+1] != oldr)
copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
"-> 0x%lx\n",
eaddr, avpn, gpte->vpage, gpte->raddr);
if (!gpte->may_read)
return -EPERM;
return 0;
} else {
dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
"ptegp=0x%lx)\n",
eaddr, to_book3s(vcpu)->sdr1, ptegp);
for (i = 0; i < 16; i += 2)
dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n",
i, pteg[i], pteg[i+1], avpn);
if (!second) {
second = HPTE_V_SECONDARY;
goto do_second;
}
/* Update PTE R and C bits, so the guest's swapper knows we used the
* page */
if (gpte->may_read) {
/* Set the accessed flag */
r |= HPTE_R_R;
}
if (data && gpte->may_write) {
/* Set the dirty flag -- XXX even if not writing */
r |= HPTE_R_C;
}
/* Write back into the PTEG */
if (pteg[i+1] != r) {
pteg[i+1] = r;
copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
}
if (!gpte->may_read)
return -EPERM;
return 0;
no_page_found:
return -ENOENT;
......
......@@ -37,6 +37,8 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
#include "book3s_hv_cma.h"
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
......@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
unsigned long hpt;
struct revmap_entry *rev;
struct kvmppc_linear_info *li;
long order = kvm_hpt_order;
struct page *page = NULL;
long order = KVM_DEFAULT_HPT_ORDER;
if (htab_orderp) {
order = *htab_orderp;
......@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
order = PPC_MIN_HPT_ORDER;
}
kvm->arch.hpt_cma_alloc = 0;
/*
* If the user wants a different size from default,
* try first to allocate it from the kernel page allocator.
* We keep the CMA reserved for failed allocation.
*/
hpt = 0;
if (order != kvm_hpt_order) {
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
__GFP_NOWARN, order - PAGE_SHIFT);
if (!hpt)
--order;
}
hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
__GFP_NOWARN, order - PAGE_SHIFT);
/* Next try to allocate from the preallocated pool */
if (!hpt) {
li = kvm_alloc_hpt();
if (li) {
hpt = (ulong)li->base_virt;
kvm->arch.hpt_li = li;
order = kvm_hpt_order;
}
VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
kvm->arch.hpt_cma_alloc = 1;
} else
--order;
}
/* Lastly try successively smaller sizes from the page allocator */
......@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
return 0;
out_freehpt:
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
if (kvm->arch.hpt_cma_alloc)
kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
else
free_pages(hpt, order - PAGE_SHIFT);
return -ENOMEM;
......@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
{
kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap);
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
if (kvm->arch.hpt_cma_alloc)
kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
1 << (kvm->arch.hpt_order - PAGE_SHIFT));
else
free_pages(kvm->arch.hpt_virt,
kvm->arch.hpt_order - PAGE_SHIFT);
......
......@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
case SPRN_MSSSR0:
case SPRN_DABR:
break;
unprivileged:
default:
......@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_PMC4_GEKKO:
case SPRN_WPAR_GEKKO:
case SPRN_MSSSR0:
case SPRN_DABR:
*spr_val = 0;
break;
default:
......
......@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
struct kvm_sregs *sregs)
{
int i;
sregs->pvr = vcpu->arch.pvr;
memset(sregs, 0, sizeof(struct kvm_sregs));
sregs->pvr = vcpu->arch.pvr;
for (i = 0; i < vcpu->arch.slb_max; i++) {
sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
......@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
struct kvm_sregs *sregs)
{
int i, j;
......@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct kvmppc_linear_info *ri = vma->vm_file->private_data;
struct page *page;
struct kvm_rma_info *ri = vma->vm_file->private_data;
if (vmf->pgoff >= ri->npages)
if (vmf->pgoff >= kvm_rma_pages)
return VM_FAULT_SIGBUS;
page = pfn_to_page(ri->base_pfn + vmf->pgoff);
......@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_rma_release(struct inode *inode, struct file *filp)
{
struct kvmppc_linear_info *ri = filp->private_data;
struct kvm_rma_info *ri = filp->private_data;
kvm_release_rma(ri);
return 0;
......@@ -1549,8 +1548,17 @@ static const struct file_operations kvm_rma_fops = {
long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
{
struct kvmppc_linear_info *ri;
long fd;
struct kvm_rma_info *ri;
/*
* Only do this on PPC970 in HV mode
*/
if (!cpu_has_feature(CPU_FTR_HVMODE) ||
!cpu_has_feature(CPU_FTR_ARCH_201))
return -EINVAL;
if (!kvm_rma_pages)
return -EINVAL;
ri = kvm_alloc_rma();
if (!ri)
......@@ -1560,7 +1568,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
if (fd < 0)
kvm_release_rma(ri);
ret->rma_size = ri->npages << PAGE_SHIFT;
ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
return fd;
}
......@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
struct kvm *kvm = vcpu->kvm;
struct kvmppc_linear_info *ri = NULL;
struct kvm_rma_info *ri = NULL;
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
......@@ -1803,13 +1811,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
} else {
/* Set up to use an RMO region */
rma_size = ri->npages;
rma_size = kvm_rma_pages;
if (rma_size > memslot->npages)
rma_size = memslot->npages;
rma_size <<= PAGE_SHIFT;
rmls = lpcr_rmls(rma_size);
err = -EINVAL;
if (rmls < 0) {
if ((long)rmls < 0) {
pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
goto out_srcu;
}
......@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* POWER7 */
lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
lpcr |= rmls << LPCR_RMLS_SH;
kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
}
kvm->arch.lpcr = lpcr;
pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
/* Initialize phys addrs of pages in RMO */
npages = ri->npages;
npages = kvm_rma_pages;
porder = __ilog2(npages);
physp = memslot->arch.slot_phys;
if (physp) {
......@@ -1874,7 +1882,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
/* Allocate the guest's logical partition ID */
lpid = kvmppc_alloc_lpid();
if (lpid < 0)
if ((long)lpid < 0)
return -ENOMEM;
kvm->arch.lpid = lpid;
......
......@@ -13,33 +13,34 @@
#include <linux/spinlock.h>
#include <linux/bootmem.h>
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/sizes.h>
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#define KVM_LINEAR_RMA 0
#define KVM_LINEAR_HPT 1
static void __init kvm_linear_init_one(ulong size, int count, int type);
static struct kvmppc_linear_info *kvm_alloc_linear(int type);
static void kvm_release_linear(struct kvmppc_linear_info *ri);
int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
EXPORT_SYMBOL_GPL(kvm_hpt_order);
/*************** RMA *************/
#include "book3s_hv_cma.h"
/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
*/
#define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */
/*
* By default we reserve 5% of memory for hash pagetable allocation.
*/
static unsigned long kvm_cma_resv_ratio = 5;
/*
* This maintains a list of RMAs (real mode areas) for KVM guests to use.
* We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
* Each RMA has to be physically contiguous and of a size that the
* hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
* and other larger sizes. Since we are unlikely to be allocate that
* much physically contiguous memory after the system is up and running,
* we preallocate a set of RMAs in early boot for KVM to use.
* we preallocate a set of RMAs in early boot using CMA.
* should be power of 2.
*/
static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
static unsigned long kvm_rma_count;
unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
EXPORT_SYMBOL_GPL(kvm_rma_pages);
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
......@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size)
static int __init early_parse_rma_size(char *p)
{
if (!p)
return 1;
unsigned long kvm_rma_size;
pr_debug("%s(%s)\n", __func__, p);
if (!p)
return -EINVAL;
kvm_rma_size = memparse(p, &p);
/*
* Check that the requested size is one supported in hardware
*/
if (lpcr_rmls(kvm_rma_size) < 0) {
pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
return -EINVAL;
}
kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
return 0;
}
early_param("kvm_rma_size", early_parse_rma_size);
static int __init early_parse_rma_count(char *p)
struct kvm_rma_info *kvm_alloc_rma()
{
if (!p)
return 1;
kvm_rma_count = simple_strtoul(p, NULL, 0);
return 0;
}
early_param("kvm_rma_count", early_parse_rma_count);
struct kvmppc_linear_info *kvm_alloc_rma(void)
{
return kvm_alloc_linear(KVM_LINEAR_RMA);
struct page *page;
struct kvm_rma_info *ri;
ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
if (!ri)
return NULL;
page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
if (!page)
goto err_out;
atomic_set(&ri->use_count, 1);
ri->base_pfn = page_to_pfn(page);
return ri;
err_out:
kfree(ri);
return NULL;
}
EXPORT_SYMBOL_GPL(kvm_alloc_rma);
void kvm_release_rma(struct kvmppc_linear_info *ri)
void kvm_release_rma(struct kvm_rma_info *ri)
{
kvm_release_linear(ri);
if (atomic_dec_and_test(&ri->use_count)) {
kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
kfree(ri);
}
}
EXPORT_SYMBOL_GPL(kvm_release_rma);
/*************** HPT *************/
/*
* This maintains a list of big linear HPT tables that contain the GVA->HPA
* memory mappings. If we don't reserve those early on, we might not be able
* to get a big (usually 16MB) linear memory region from the kernel anymore.
*/
static unsigned long kvm_hpt_count;
static int __init early_parse_hpt_count(char *p)
static int __init early_parse_kvm_cma_resv(char *p)
{
pr_debug("%s(%s)\n", __func__, p);
if (!p)
return 1;
kvm_hpt_count = simple_strtoul(p, NULL, 0);
return 0;
return -EINVAL;
return kstrtoul(p, 0, &kvm_cma_resv_ratio);
}
early_param("kvm_hpt_count", early_parse_hpt_count);
early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
struct kvmppc_linear_info *kvm_alloc_hpt(void)
struct page *kvm_alloc_hpt(unsigned long nr_pages)
{
return kvm_alloc_linear(KVM_LINEAR_HPT);
unsigned long align_pages = HPT_ALIGN_PAGES;
/* Old CPUs require HPT aligned on a multiple of its size */
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_pages = nr_pages;
return kvm_alloc_cma(nr_pages, align_pages);
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
void kvm_release_hpt(struct kvmppc_linear_info *li)
void kvm_release_hpt(struct page *page, unsigned long nr_pages)
{
kvm_release_linear(li);
kvm_release_cma(page, nr_pages);
}
EXPORT_SYMBOL_GPL(kvm_release_hpt);
/*************** generic *************/
static LIST_HEAD(free_linears);
static DEFINE_SPINLOCK(linear_lock);
static void __init kvm_linear_init_one(ulong size, int count, int type)
{
unsigned long i;
unsigned long j, npages;
void *linear;
struct page *pg;
const char *typestr;
struct kvmppc_linear_info *linear_info;
if (!count)
return;
typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
npages = size >> PAGE_SHIFT;
linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
for (i = 0; i < count; ++i) {
linear = alloc_bootmem_align(size, size);
pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
size >> 20);
linear_info[i].base_virt = linear;
linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
linear_info[i].npages = npages;
linear_info[i].type = type;
list_add_tail(&linear_info[i].list, &free_linears);
atomic_set(&linear_info[i].use_count, 0);
pg = pfn_to_page(linear_info[i].base_pfn);
for (j = 0; j < npages; ++j) {
atomic_inc(&pg->_count);
++pg;
}
}
}
static struct kvmppc_linear_info *kvm_alloc_linear(int type)
{
struct kvmppc_linear_info *ri, *ret;
ret = NULL;
spin_lock(&linear_lock);
list_for_each_entry(ri, &free_linears, list) {
if (ri->type != type)
continue;
list_del(&ri->list);
atomic_inc(&ri->use_count);
memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
ret = ri;
break;
}
spin_unlock(&linear_lock);
return ret;
}
static void kvm_release_linear(struct kvmppc_linear_info *ri)
{
if (atomic_dec_and_test(&ri->use_count)) {
spin_lock(&linear_lock);
list_add_tail(&ri->list, &free_linears);
spin_unlock(&linear_lock);
}
}
/*
* Called at boot time while the bootmem allocator is active,
* to allocate contiguous physical memory for the hash page
* tables for guests.
/**
* kvm_cma_reserve() - reserve area for kvm hash pagetable
*
* This function reserves memory from early allocator. It should be
* called by arch specific code once the early allocator (memblock or bootmem)
* has been activated and all other subsystems have already allocated/reserved
* memory.
*/
void __init kvm_linear_init(void)
void __init kvm_cma_reserve(void)
{
/* HPT */
kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
/* RMA */
/* Only do this on PPC970 in HV mode */
if (!cpu_has_feature(CPU_FTR_HVMODE) ||
!cpu_has_feature(CPU_FTR_ARCH_201))
return;
if (!kvm_rma_size || !kvm_rma_count)
return;
/* Check that the requested size is one supported in hardware */
if (lpcr_rmls(kvm_rma_size) < 0) {
pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
return;
unsigned long align_size;
struct memblock_region *reg;
phys_addr_t selected_size = 0;
/*
* We cannot use memblock_phys_mem_size() here, because
* memblock_analyze() has not been called yet.
*/
for_each_memblock(memory, reg)
selected_size += memblock_region_memory_end_pfn(reg) -
memblock_region_memory_base_pfn(reg);
selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
if (selected_size) {
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);
/*
* Old CPUs require HPT aligned on a multiple of its size. So for them
* make the alignment as max size we could request.
*/
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_size = __rounddown_pow_of_two(selected_size);
else
align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
kvm_cma_declare_contiguous(selected_size, align_size);
}
kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
}
/*
* Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
* for DMA mapping framework
*
* Copyright IBM Corporation, 2013
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License or (at your optional) any later version of the license.
*
*/
#define pr_fmt(fmt) "kvm_cma: " fmt
#ifdef CONFIG_CMA_DEBUG
#ifndef DEBUG
# define DEBUG
#endif
#endif
#include <linux/memblock.h>
#include <linux/mutex.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include "book3s_hv_cma.h"
struct kvm_cma {
unsigned long base_pfn;
unsigned long count;
unsigned long *bitmap;
};
static DEFINE_MUTEX(kvm_cma_mutex);
static struct kvm_cma kvm_cma_area;
/**
* kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
* for kvm hash pagetable
* @size: Size of the reserved memory.
* @alignment: Alignment for the contiguous memory area
*
* This function reserves memory for kvm cma area. It should be
* called by arch code when early allocator (memblock or bootmem)
* is still activate.
*/
long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
{
long base_pfn;
phys_addr_t addr;
struct kvm_cma *cma = &kvm_cma_area;
pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
if (!size)
return -EINVAL;
/*
* Sanitise input arguments.
* We should be pageblock aligned for CMA.
*/
alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
size = ALIGN(size, alignment);
/*
* Reserve memory
* Use __memblock_alloc_base() since
* memblock_alloc_base() panic()s.
*/
addr = __memblock_alloc_base(size, alignment, 0);
if (!addr) {
base_pfn = -ENOMEM;
goto err;
} else
base_pfn = PFN_DOWN(addr);
/*
* Each reserved area must be initialised later, when more kernel
* subsystems (like slab allocator) are available.
*/
cma->base_pfn = base_pfn;
cma->count = size >> PAGE_SHIFT;
pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
return 0;
err:
pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
return base_pfn;
}
/**
* kvm_alloc_cma() - allocate pages from contiguous area
* @nr_pages: Requested number of pages.
* @align_pages: Requested alignment in number of pages
*
* This function allocates memory buffer for hash pagetable.
*/
struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
{
int ret;
struct page *page = NULL;
struct kvm_cma *cma = &kvm_cma_area;
unsigned long chunk_count, nr_chunk;
unsigned long mask, pfn, pageno, start = 0;
if (!cma || !cma->count)
return NULL;
pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
(void *)cma, nr_pages, align_pages);
if (!nr_pages)
return NULL;
/*
* align mask with chunk size. The bit tracks pages in chunk size
*/
VM_BUG_ON(!is_power_of_2(align_pages));
mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
mutex_lock(&kvm_cma_mutex);
for (;;) {
pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
start, nr_chunk, mask);
if (pageno >= chunk_count)
break;
pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
if (ret == 0) {
bitmap_set(cma->bitmap, pageno, nr_chunk);
page = pfn_to_page(pfn);
memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
break;
} else if (ret != -EBUSY) {
break;
}
pr_debug("%s(): memory range at %p is busy, retrying\n",
__func__, pfn_to_page(pfn));
/* try again with a bit different memory target */
start = pageno + mask + 1;
}
mutex_unlock(&kvm_cma_mutex);
pr_debug("%s(): returned %p\n", __func__, page);
return page;
}
/**
* kvm_release_cma() - release allocated pages for hash pagetable
* @pages: Allocated pages.
* @nr_pages: Number of allocated pages.
*
* This function releases memory allocated by kvm_alloc_cma().
* It returns false when provided pages do not belong to contiguous area and
* true otherwise.
*/
bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
{
unsigned long pfn;
unsigned long nr_chunk;
struct kvm_cma *cma = &kvm_cma_area;
if (!cma || !pages)
return false;
pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
pfn = page_to_pfn(pages);
if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
return false;
VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
mutex_lock(&kvm_cma_mutex);
bitmap_clear(cma->bitmap,
(pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
nr_chunk);
free_contig_range(pfn, nr_pages);
mutex_unlock(&kvm_cma_mutex);
return true;
}
static int __init kvm_cma_activate_area(unsigned long base_pfn,
unsigned long count)
{
unsigned long pfn = base_pfn;
unsigned i = count >> pageblock_order;
struct zone *zone;
WARN_ON_ONCE(!pfn_valid(pfn));
zone = page_zone(pfn_to_page(pfn));
do {
unsigned j;
base_pfn = pfn;
for (j = pageblock_nr_pages; j; --j, pfn++) {
WARN_ON_ONCE(!pfn_valid(pfn));
/*
* alloc_contig_range requires the pfn range
* specified to be in the same zone. Make this
* simple by forcing the entire CMA resv range
* to be in the same zone.
*/
if (page_zone(pfn_to_page(pfn)) != zone)
return -EINVAL;
}
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
} while (--i);
return 0;
}
static int __init kvm_cma_init_reserved_areas(void)
{
int bitmap_size, ret;
unsigned long chunk_count;
struct kvm_cma *cma = &kvm_cma_area;
pr_debug("%s()\n", __func__);
if (!cma->count)
return 0;
chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
if (!cma->bitmap)
return -ENOMEM;
ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
if (ret)
goto error;
return 0;
error:
kfree(cma->bitmap);
return ret;
}
core_initcall(kvm_cma_init_reserved_areas);
/*
* Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
* for DMA mapping framework
*
* Copyright IBM Corporation, 2013
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License or (at your optional) any later version of the license.
*
*/
#ifndef __POWERPC_KVM_CMA_ALLOC_H__
#define __POWERPC_KVM_CMA_ALLOC_H__
/*
* Both RMA and Hash page allocation will be multiple of 256K.
*/
#define KVM_CMA_CHUNK_ORDER 18
extern struct page *kvm_alloc_cma(unsigned long nr_pages,
unsigned long align_pages);
extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
extern long kvm_cma_declare_contiguous(phys_addr_t size,
phys_addr_t alignment) __init;
#endif
......@@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock)
return old == 0;
}
/*
* tlbie/tlbiel is a bit different on the PPC970 compared to later
* processors such as POWER7; the large page bit is in the instruction
* not RB, and the top 16 bits and the bottom 12 bits of the VA
* in RB must be 0.
*/
static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
long i;
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i) {
unsigned long rb = rbvalues[i];
if (rb & 1) /* large page */
asm volatile("tlbie %0,1" : :
"r" (rb & 0x0000fffffffff000ul));
else
asm volatile("tlbie %0,0" : :
"r" (rb & 0x0000fffffffff000ul));
}
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i) {
unsigned long rb = rbvalues[i];
if (rb & 1) /* large page */
asm volatile("tlbiel %0,1" : :
"r" (rb & 0x0000fffffffff000ul));
else
asm volatile("tlbiel %0,0" : :
"r" (rb & 0x0000fffffffff000ul));
}
asm volatile("ptesync" : : : "memory");
}
}
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
long i;
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
/* PPC970 tlbie instruction is a bit different */
do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
return;
}
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i)
asm volatile(PPC_TLBIE(%1,%0) : :
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i)
asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
asm volatile("ptesync" : : : "memory");
}
}
long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
unsigned long pte_index, unsigned long avpn,
unsigned long *hpret)
......@@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(v, hpte[1], pte_index);
if (global_invalidates(kvm, flags)) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
: : "r" (rb), "r" (kvm->arch.lpid));
asm volatile("ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
asm volatile("ptesync" : : : "memory");
asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory");
}
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
}
......@@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
unsigned long *hp, *hptes[4], tlbrb[4];
long int i, j, k, n, found, indexes[4];
unsigned long flags, req, pte_index, rcbits;
long int local = 0;
int global;
long int ret = H_SUCCESS;
struct revmap_entry *rev, *revs[4];
if (atomic_read(&kvm->online_vcpus) == 1)
local = 1;
global = global_invalidates(kvm, 0);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0;
for (; i < 4; ++i) {
......@@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
break;
/* Now that we've collected a batch, do the tlbies */
if (!local) {
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
for (k = 0; k < n; ++k)
asm volatile(PPC_TLBIE(%1,%0) : :
"r" (tlbrb[k]),
"r" (kvm->arch.lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
asm volatile("ptesync" : : : "memory");
for (k = 0; k < n; ++k)
asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
asm volatile("ptesync" : : : "memory");
}
do_tlbies(kvm, tlbrb, n, global, true);
/* Read PTE low words after tlbie to get final R/C values */
for (k = 0; k < n; ++k) {
......@@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (v & HPTE_V_VALID) {
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = v & ~HPTE_V_VALID;
if (global_invalidates(kvm, flags)) {
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
: : "r" (rb), "r" (kvm->arch.lpid));
asm volatile("ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
asm volatile("ptesync" : : : "memory");
asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory");
}
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/*
* If the host has this page as readonly but the guest
* wants to make it read/write, reduce the permissions.
......@@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
hptep[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
: : "r" (rb), "r" (kvm->arch.lpid));
asm volatile("ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
do_tlbies(kvm, &rb, 1, 1, true);
}
EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
......@@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
/* modify only the second-last byte, which contains the ref bit */
*((char *)hptep + 14) = rbyte;
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
: : "r" (rb), "r" (kvm->arch.lpid));
asm volatile("ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
do_tlbies(kvm, &rb, 1, 1, false);
}
EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
......
......@@ -1381,7 +1381,7 @@ hcall_try_real_mode:
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
LOAD_REG_ADDR(r4, hcall_real_table)
lwzx r3,r3,r4
lwax r3,r3,r4
cmpwi r3,0
beq guest_exit_cont
add r3,r3,r4
......
......@@ -92,6 +92,11 @@ kvm_start_lightweight:
PPC_LL r3, VCPU_HFLAGS(r4)
rldicl r3, r3, 0, 63 /* r3 &= 1 */
stb r3, HSTATE_RESTORE_HID5(r13)
/* Load up guest SPRG3 value, since it's user readable */
ld r3, VCPU_SHARED(r4)
ld r3, VCPU_SHARED_SPRG3(r3)
mtspr SPRN_SPRG3, r3
#endif /* CONFIG_PPC_BOOK3S_64 */
PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
......@@ -123,6 +128,15 @@ kvmppc_handler_highmem:
/* R7 = vcpu */
PPC_LL r7, GPR4(r1)
#ifdef CONFIG_PPC_BOOK3S_64
/*
* Reload kernel SPRG3 value.
* No need to save guest value as usermode can't modify SPRG3.
*/
ld r3, PACA_SPRG3(r13)
mtspr SPRN_SPRG3, r3
#endif /* CONFIG_PPC_BOOK3S_64 */
PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7)
PPC_STL r16, VCPU_GPR(R16)(r7)
......
......@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
* both the traditional FP registers and the added VSX
* registers into thread.fpr[].
*/
giveup_fpu(current);
if (current->thread.regs->msr & MSR_FP)
giveup_fpu(current);
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
......@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
giveup_altivec(current);
if (current->thread.regs->msr & MSR_VEC)
giveup_altivec(current);
memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
vcpu->arch.vscr = t->vscr;
}
......@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
#endif
current->thread.regs->msr |= msr;
if (msr & MSR_FP) {
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
......@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
#endif
}
current->thread.regs->msr |= msr;
vcpu->arch.guest_owned_ext |= msr;
kvmppc_recalc_shadow_msr(vcpu);
return RESUME_GUEST;
}
/*
* Kernel code using FP or VMX could have flushed guest state to
* the thread_struct; if so, get it back now.
*/
static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
{
unsigned long lost_ext;
lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
if (!lost_ext)
return;
if (lost_ext & MSR_FP)
kvmppc_load_up_fpu();
if (lost_ext & MSR_VEC)
kvmppc_load_up_altivec();
current->thread.regs->msr |= lost_ext;
}
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
......@@ -772,7 +792,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_SYSCALL:
if (vcpu->arch.papr_enabled &&
(kvmppc_get_last_inst(vcpu) == 0x44000022) &&
(kvmppc_get_last_sc(vcpu) == 0x44000022) &&
!(vcpu->arch.shared->msr & MSR_PR)) {
/* SC 1 papr hypercalls */
ulong cmd = kvmppc_get_gpr(vcpu, 3);
......@@ -890,8 +910,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_enable();
r = s;
} else {
kvmppc_lazy_ee_enable();
kvmppc_fix_ee_before_entry();
}
kvmppc_handle_lost_ext(vcpu);
}
trace_kvm_book3s_reenter(r, vcpu);
......@@ -1047,11 +1068,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (err)
goto free_shadow_vcpu;
err = -ENOMEM;
p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
/* the real shared page fills the last 4k of our page */
vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
if (!p)
goto uninit_vcpu;
/* the real shared page fills the last 4k of our page */
vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
#ifdef CONFIG_PPC_BOOK3S_64
/* default to book3s_64 (970fx) */
......@@ -1161,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
kvmppc_lazy_ee_enable();
kvmppc_fix_ee_before_entry();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
......
......@@ -19,6 +19,7 @@
#include <asm/hvcall.h>
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/time.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
......
......@@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
goto out;
}
kvm_guest_enter();
#ifdef CONFIG_PPC_FPU
/* Save userspace FPU state in stack */
enable_kernel_fp();
......@@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_load_guest_fp(vcpu);
#endif
kvmppc_lazy_ee_enable();
kvmppc_fix_ee_before_entry();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
......@@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_enable();
r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
} else {
kvmppc_lazy_ee_enable();
kvmppc_fix_ee_before_entry();
}
}
......
......@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
kvm_guest_exit();
continue;
}
trace_hardirqs_on();
#endif
kvm_guest_enter();
......
......@@ -200,11 +200,9 @@ config DMA_SHARED_BUFFER
APIs extension; the file's descriptor can then be passed on to other
driver.
config CMA
bool "Contiguous Memory Allocator"
depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
select MIGRATION
select MEMORY_ISOLATION
config DMA_CMA
bool "DMA Contiguous Memory Allocator"
depends on HAVE_DMA_CONTIGUOUS && CMA
help
This enables the Contiguous Memory Allocator which allows drivers
to allocate big physically-contiguous blocks of memory for use with
......@@ -213,17 +211,7 @@ config CMA
For more information see <include/linux/dma-contiguous.h>.
If unsure, say "n".
if CMA
config CMA_DEBUG
bool "CMA debug messages (DEVELOPMENT)"
depends on DEBUG_KERNEL
help
Turns on debug messages in CMA. This produces KERN_DEBUG
messages for every CMA call as well as various messages while
processing calls such as dma_alloc_from_contiguous().
This option does not affect warning and error messages.
if DMA_CMA
comment "Default contiguous memory area size:"
config CMA_SIZE_MBYTES
......
......@@ -6,7 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \
attribute_container.o transport_class.o \
topology.o
obj-$(CONFIG_DEVTMPFS) += devtmpfs.o
obj-$(CONFIG_CMA) += dma-contiguous.o
obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
obj-y += power/
obj-$(CONFIG_HAS_DMA) += dma-mapping.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
......
......@@ -57,7 +57,7 @@ struct cma;
struct page;
struct device;
#ifdef CONFIG_CMA
#ifdef CONFIG_DMA_CMA
/*
* There is always at least global CMA area and a few optional device
......
......@@ -478,6 +478,30 @@ config FRONTSWAP
If unsure, say Y to enable frontswap.
config CMA
bool "Contiguous Memory Allocator"
depends on HAVE_MEMBLOCK
select MIGRATION
select MEMORY_ISOLATION
help
This enables the Contiguous Memory Allocator which allows other
subsystems to allocate big physically-contiguous blocks of memory.
CMA reserves a region of memory and allows only movable pages to
be allocated from it. This way, the kernel can use the memory for
pagecache and when a subsystem requests for contiguous area, the
allocated pages are migrated away to serve the contiguous request.
If unsure, say "n".
config CMA_DEBUG
bool "CMA debug messages (DEVELOPMENT)"
depends on DEBUG_KERNEL && CMA
help
Turns on debug messages in CMA. This produces KERN_DEBUG
messages for every CMA call as well as various messages while
processing calls such as dma_alloc_from_contiguous().
This option does not affect warning and error messages.
config ZBUD
tristate
default n
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册