book3s_64_mmu_hv.c 9.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, version 2, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 *
 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
 */

#include <linux/types.h>
#include <linux/string.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
26
#include <linux/vmalloc.h>
27 28 29 30 31 32 33 34 35 36 37 38

#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu-hash64.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>

#define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */

39 40
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970	63
41 42 43 44 45 46 47
#define NR_LPIDS	(LPID_RSVD + 1)
unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)];

long kvmppc_alloc_hpt(struct kvm *kvm)
{
	unsigned long hpt;
	unsigned long lpid;
48
	struct revmap_entry *rev;
49

50
	/* Allocate guest's hashed page table */
51 52 53 54 55 56 57 58
	hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
			       HPT_ORDER - PAGE_SHIFT);
	if (!hpt) {
		pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
		return -ENOMEM;
	}
	kvm->arch.hpt_virt = hpt;

59 60 61 62 63 64 65 66 67
	/* Allocate reverse map array */
	rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
	if (!rev) {
		pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
		goto out_freehpt;
	}
	kvm->arch.revmap = rev;

	/* Allocate the guest's logical partition ID */
68 69 70 71
	do {
		lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
		if (lpid >= NR_LPIDS) {
			pr_err("kvm_alloc_hpt: No LPIDs free\n");
72
			goto out_freeboth;
73 74 75 76 77 78 79 80
		}
	} while (test_and_set_bit(lpid, lpid_inuse));

	kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
	kvm->arch.lpid = lpid;

	pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
	return 0;
81 82 83 84 85 86

 out_freeboth:
	vfree(rev);
 out_freehpt:
	free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
	return -ENOMEM;
87 88 89 90 91
}

void kvmppc_free_hpt(struct kvm *kvm)
{
	clear_bit(kvm->arch.lpid, lpid_inuse);
92
	vfree(kvm->arch.revmap);
93 94 95
	free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
}

96 97 98 99 100 101 102 103 104 105 106 107 108 109
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
{
	return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
}

/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
{
	return (pgsize == 0x10000) ? 0x1000 : 0;
}

void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
		     unsigned long porder)
110 111
{
	unsigned long i;
112
	unsigned long npages;
113 114
	unsigned long hp_v, hp_r;
	unsigned long addr, hash;
115 116
	unsigned long psize;
	unsigned long hp0, hp1;
117
	long ret;
118

119 120
	psize = 1ul << porder;
	npages = memslot->npages >> (porder - PAGE_SHIFT);
121 122

	/* VRMA can't be > 1TB */
123 124
	if (npages > 1ul << (40 - porder))
		npages = 1ul << (40 - porder);
125 126 127 128
	/* Can't use more than 1 HPTE per HPTEG */
	if (npages > HPT_NPTEG)
		npages = HPT_NPTEG;

129 130 131 132 133
	hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
		HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
	hp1 = hpte1_pgsize_encoding(psize) |
		HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;

134
	for (i = 0; i < npages; ++i) {
135
		addr = i << porder;
136 137 138 139 140 141 142 143
		/* can't use hpt_hash since va > 64 bits */
		hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
		/*
		 * We assume that the hash table is empty and no
		 * vcpus are using it at this stage.  Since we create
		 * at most one HPTE per HPTEG, we just assume entry 7
		 * is available and use it.
		 */
144
		hash = (hash << 3) + 7;
145 146
		hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
		hp_r = hp1 | addr;
147 148 149 150 151 152
		ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
		if (ret != H_SUCCESS) {
			pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
			       addr, ret);
			break;
		}
153 154 155 156 157
	}
}

int kvmppc_mmu_hv_init(void)
{
158 159 160
	unsigned long host_lpid, rsvd_lpid;

	if (!cpu_has_feature(CPU_FTR_HVMODE))
161
		return -EINVAL;
162

163
	memset(lpid_inuse, 0, sizeof(lpid_inuse));
164 165 166 167 168 169 170 171 172 173 174 175

	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
		host_lpid = mfspr(SPRN_LPID);	/* POWER7 */
		rsvd_lpid = LPID_RSVD;
	} else {
		host_lpid = 0;			/* PPC970 */
		rsvd_lpid = MAX_LPID_970;
	}

	set_bit(host_lpid, lpid_inuse);
	/* rsvd_lpid is reserved for use in partition switching */
	set_bit(rsvd_lpid, lpid_inuse);
176 177 178 179 180 181 182 183 184 185 186 187 188

	return 0;
}

void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
{
}

static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
{
	kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
}

189 190 191 192 193
/*
 * This is called to get a reference to a guest page if there isn't
 * one already in the kvm->arch.slot_phys[][] arrays.
 */
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
194 195
				  struct kvm_memory_slot *memslot,
				  unsigned long psize)
196 197
{
	unsigned long start;
198 199 200
	long np, err;
	struct page *page, *hpage, *pages[1];
	unsigned long s, pgsize;
201
	unsigned long *physp;
202 203
	unsigned int is_io, got, pgorder;
	struct vm_area_struct *vma;
204
	unsigned long pfn, i, npages;
205 206 207 208

	physp = kvm->arch.slot_phys[memslot->id];
	if (!physp)
		return -EINVAL;
209
	if (physp[gfn - memslot->base_gfn])
210 211
		return 0;

212 213
	is_io = 0;
	got = 0;
214
	page = NULL;
215
	pgsize = psize;
216
	err = -EINVAL;
217 218 219 220
	start = gfn_to_hva_memslot(memslot, gfn);

	/* Instantiate and get the page we want access to */
	np = get_user_pages_fast(start, 1, 1, pages);
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
	if (np != 1) {
		/* Look up the vma for the page */
		down_read(&current->mm->mmap_sem);
		vma = find_vma(current->mm, start);
		if (!vma || vma->vm_start > start ||
		    start + psize > vma->vm_end ||
		    !(vma->vm_flags & VM_PFNMAP))
			goto up_err;
		is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
		pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
		/* check alignment of pfn vs. requested page size */
		if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
			goto up_err;
		up_read(&current->mm->mmap_sem);

	} else {
		page = pages[0];
		got = KVMPPC_GOT_PAGE;

		/* See if this is a large page */
		s = PAGE_SIZE;
		if (PageHuge(page)) {
			hpage = compound_head(page);
			s <<= compound_order(hpage);
			/* Get the whole large page if slot alignment is ok */
			if (s > psize && slot_is_aligned(memslot, s) &&
			    !(memslot->userspace_addr & (s - 1))) {
				start &= ~(s - 1);
				pgsize = s;
				page = hpage;
			}
252
		}
253 254 255
		if (s < psize)
			goto out;
		pfn = page_to_pfn(page);
256 257
	}

258 259 260
	npages = pgsize >> PAGE_SHIFT;
	pgorder = __ilog2(npages);
	physp += (gfn - memslot->base_gfn) & ~(npages - 1);
261
	spin_lock(&kvm->arch.slot_phys_lock);
262 263
	for (i = 0; i < npages; ++i) {
		if (!physp[i]) {
264 265
			physp[i] = ((pfn + i) << PAGE_SHIFT) +
				got + is_io + pgorder;
266 267 268
			got = 0;
		}
	}
269
	spin_unlock(&kvm->arch.slot_phys_lock);
270
	err = 0;
271

272 273 274 275 276 277 278
 out:
	if (got) {
		if (PageHuge(page))
			page = compound_head(page);
		put_page(page);
	}
	return err;
279 280 281 282

 up_err:
	up_read(&current->mm->mmap_sem);
	return err;
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
}

/*
 * We come here on a H_ENTER call from the guest when
 * we don't have the requested page pinned already.
 */
long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
			long pte_index, unsigned long pteh, unsigned long ptel)
{
	struct kvm *kvm = vcpu->kvm;
	unsigned long psize, gpa, gfn;
	struct kvm_memory_slot *memslot;
	long ret;

	psize = hpte_page_size(pteh, ptel);
	if (!psize)
		return H_PARAMETER;

	/* Find the memslot (if any) for this address */
	gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
	gfn = gpa >> PAGE_SHIFT;
	memslot = gfn_to_memslot(kvm, gfn);
	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
		return H_PARAMETER;
307 308 309
	if (!slot_is_aligned(memslot, psize))
		return H_PARAMETER;
	if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
310 311 312 313 314 315 316 317 318 319 320 321 322 323
		return H_PARAMETER;

	preempt_disable();
	ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
	preempt_enable();
	if (ret == H_TOO_HARD) {
		/* this can't happen */
		pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
		ret = H_RESOURCE;	/* or something */
	}
	return ret;

}

324 325 326 327 328 329
static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
				struct kvmppc_pte *gpte, bool data)
{
	return -ENOENT;
}

330 331 332 333 334 335
void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
			    unsigned long *nb_ret)
{
	struct kvm_memory_slot *memslot;
	unsigned long gfn = gpa >> PAGE_SHIFT;
	struct page *page;
336 337
	unsigned long psize, offset;
	unsigned long pa;
338 339 340 341 342 343 344 345
	unsigned long *physp;

	memslot = gfn_to_memslot(kvm, gfn);
	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
		return NULL;
	physp = kvm->arch.slot_phys[memslot->id];
	if (!physp)
		return NULL;
346
	physp += gfn - memslot->base_gfn;
347
	pa = *physp;
348
	if (!pa) {
349
		if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0)
350 351 352
			return NULL;
		pa = *physp;
	}
353 354 355 356 357 358
	page = pfn_to_page(pa >> PAGE_SHIFT);
	psize = PAGE_SIZE;
	if (PageHuge(page)) {
		page = compound_head(page);
		psize <<= compound_order(page);
	}
359
	get_page(page);
360
	offset = gpa & (psize - 1);
361
	if (nb_ret)
362
		*nb_ret = psize - offset;
363 364 365 366 367 368 369 370 371 372 373
	return page_address(page) + offset;
}

void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
{
	struct page *page = virt_to_page(va);

	page = compound_head(page);
	put_page(page);
}

374 375 376 377
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
{
	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;

378 379 380 381
	if (cpu_has_feature(CPU_FTR_ARCH_206))
		vcpu->arch.slb_nr = 32;		/* POWER7 */
	else
		vcpu->arch.slb_nr = 64;
382 383 384 385 386 387

	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
	mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;

	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
}