hash_native_64.c 18.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * native hashtable management.
 *
 * SMP scalability work:
 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
12 13 14

#undef DEBUG_LOW

L
Linus Torvalds 已提交
15 16
#include <linux/spinlock.h>
#include <linux/bitops.h>
17
#include <linux/of.h>
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27
#include <linux/threads.h>
#include <linux/smp.h>

#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
28
#include <asm/udbg.h>
29
#include <asm/kexec.h>
30
#include <asm/ppc-opcode.h>
31

I
Ian Munsie 已提交
32 33
#include <misc/cxl.h>

34 35 36 37 38
#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
#else
#define DBG_LOW(fmt...)
#endif
L
Linus Torvalds 已提交
39

40
#ifdef __BIG_ENDIAN__
L
Linus Torvalds 已提交
41
#define HPTE_LOCK_BIT 3
42 43 44
#else
#define HPTE_LOCK_BIT (56+3)
#endif
L
Linus Torvalds 已提交
45

46
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
L
Linus Torvalds 已提交
47

48
static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
49
{
50
	unsigned long va;
51
	unsigned int penc;
52
	unsigned long sllp;
53

54 55 56 57 58 59 60 61 62 63 64 65 66
	/*
	 * We need 14 to 65 bits of va for a tlibe of 4K page
	 * With vpn we ignore the lower VPN_SHIFT bits already.
	 * And top two bits are already ignored because we can
	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
	 * of 12.
	 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
67 68 69 70
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
71 72
		/* clear out bits after (52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
73
		va |= ssize << 8;
74 75 76
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
77
		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
78
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
79
			     : "memory");
80 81
		break;
	default:
82
		/* We need 14 to 14 + i bits of va */
83
		penc = mmu_psize_defs[psize].penc[apsize];
84
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
85
		va |= penc << 12;
P
Paul Mackerras 已提交
86
		va |= ssize << 8;
87 88 89 90 91 92 93 94
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe); /* AVAL */
95
		va |= 1; /* L */
96
		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
97
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
98
			     : "memory");
99 100 101 102
		break;
	}
}

103
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
104
{
105
	unsigned long va;
106
	unsigned int penc;
107
	unsigned long sllp;
108

109 110 111 112 113 114 115
	/* VPN_SHIFT can be atmost 12 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64 bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
116 117 118 119
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
120 121
		/* clear out bits after(52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
122
		va |= ssize << 8;
123 124 125
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
126 127 128 129
		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
			     : : "r"(va) : "memory");
		break;
	default:
130
		/* We need 14 to 14 + i bits of va */
131
		penc = mmu_psize_defs[psize].penc[apsize];
132
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
133
		va |= penc << 12;
P
Paul Mackerras 已提交
134
		va |= ssize << 8;
135 136 137 138 139 140 141 142
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe);
143
		va |= 1; /* L */
144 145 146 147 148 149 150
		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
			     : : "r"(va) : "memory");
		break;
	}

}

151 152
static inline void tlbie(unsigned long vpn, int psize, int apsize,
			 int ssize, int local)
153
{
I
Ian Munsie 已提交
154
	unsigned int use_local;
155
	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
156

I
Ian Munsie 已提交
157 158
	use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();

159 160 161
	if (use_local)
		use_local = mmu_psize_defs[psize].tlbiel;
	if (lock_tlbie && !use_local)
162
		raw_spin_lock(&native_tlbie_lock);
163 164
	asm volatile("ptesync": : :"memory");
	if (use_local) {
165
		__tlbiel(vpn, psize, apsize, ssize);
166 167
		asm volatile("ptesync": : :"memory");
	} else {
168
		__tlbie(vpn, psize, apsize, ssize);
169 170 171
		asm volatile("eieio; tlbsync; ptesync": : :"memory");
	}
	if (lock_tlbie && !use_local)
172
		raw_spin_unlock(&native_tlbie_lock);
173 174
}

175
static inline void native_lock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
176
{
177
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
178 179

	while (1) {
180
		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
L
Linus Torvalds 已提交
181 182 183 184 185 186
			break;
		while(test_bit(HPTE_LOCK_BIT, word))
			cpu_relax();
	}
}

187
static inline void native_unlock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
188
{
189
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
190

191
	clear_bit_unlock(HPTE_LOCK_BIT, word);
L
Linus Torvalds 已提交
192 193
}

194
static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
195
			unsigned long pa, unsigned long rflags,
196
			unsigned long vflags, int psize, int apsize, int ssize)
L
Linus Torvalds 已提交
197
{
198
	struct hash_pte *hptep = htab_address + hpte_group;
199
	unsigned long hpte_v, hpte_r;
L
Linus Torvalds 已提交
200 201
	int i;

202
	if (!(vflags & HPTE_V_BOLTED)) {
203
		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
204
			" rflags=%lx, vflags=%lx, psize=%d)\n",
205
			hpte_group, vpn, pa, rflags, vflags, psize);
206 207
	}

L
Linus Torvalds 已提交
208
	for (i = 0; i < HPTES_PER_GROUP; i++) {
209
		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
L
Linus Torvalds 已提交
210 211
			/* retry with lock held */
			native_lock_hpte(hptep);
212
			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
L
Linus Torvalds 已提交
213 214 215 216 217 218 219 220 221 222
				break;
			native_unlock_hpte(hptep);
		}

		hptep++;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

223 224
	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
225 226 227 228 229

	if (!(vflags & HPTE_V_BOLTED)) {
		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
			i, hpte_v, hpte_r);
	}
L
Linus Torvalds 已提交
230

231
	hptep->r = cpu_to_be64(hpte_r);
L
Linus Torvalds 已提交
232
	/* Guarantee the second dword is visible before the valid bit */
233
	eieio();
L
Linus Torvalds 已提交
234 235 236 237
	/*
	 * Now set the first dword including the valid bit
	 * NOTE: this also unlocks the hpte
	 */
238
	hptep->v = cpu_to_be64(hpte_v);
L
Linus Torvalds 已提交
239 240 241

	__asm__ __volatile__ ("ptesync" : : : "memory");

242
	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
L
Linus Torvalds 已提交
243 244 245 246
}

static long native_hpte_remove(unsigned long hpte_group)
{
247
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
248 249
	int i;
	int slot_offset;
250
	unsigned long hpte_v;
L
Linus Torvalds 已提交
251

252 253
	DBG_LOW("    remove(group=%lx)\n", hpte_group);

L
Linus Torvalds 已提交
254 255 256 257 258
	/* pick a random entry to start at */
	slot_offset = mftb() & 0x7;

	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + hpte_group + slot_offset;
259
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
260

261
		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
L
Linus Torvalds 已提交
262 263
			/* retry with lock held */
			native_lock_hpte(hptep);
264
			hpte_v = be64_to_cpu(hptep->v);
265 266
			if ((hpte_v & HPTE_V_VALID)
			    && !(hpte_v & HPTE_V_BOLTED))
L
Linus Torvalds 已提交
267 268 269 270 271 272 273 274 275 276 277 278
				break;
			native_unlock_hpte(hptep);
		}

		slot_offset++;
		slot_offset &= 0x7;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

	/* Invalidate the hpte. NOTE: this also unlocks it */
279
	hptep->v = 0;
L
Linus Torvalds 已提交
280 281 282 283

	return i;
}

284
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
285 286
				 unsigned long vpn, int bpsize,
				 int apsize, int ssize, int local)
L
Linus Torvalds 已提交
287
{
288
	struct hash_pte *hptep = htab_address + slot;
289 290 291
	unsigned long hpte_v, want_v;
	int ret = 0;

292
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
293

294 295
	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
		vpn, want_v & HPTE_V_AVPN, slot, newpp);
296 297 298

	native_lock_hpte(hptep);

299
	hpte_v = be64_to_cpu(hptep->v);
300 301 302 303 304 305 306
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
307
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
308 309 310 311 312
		DBG_LOW(" -> miss\n");
		ret = -1;
	} else {
		DBG_LOW(" -> hit\n");
		/* Update the HPTE */
313 314
		hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
315
	}
316
	native_unlock_hpte(hptep);
317 318

	/* Ensure it is out of the tlb too. */
319
	tlbie(vpn, bpsize, apsize, ssize, local);
320 321

	return ret;
L
Linus Torvalds 已提交
322 323
}

324
static long native_hpte_find(unsigned long vpn, int psize, int ssize)
L
Linus Torvalds 已提交
325
{
326
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
327
	unsigned long hash;
P
Paul Mackerras 已提交
328
	unsigned long i;
L
Linus Torvalds 已提交
329
	long slot;
330
	unsigned long want_v, hpte_v;
L
Linus Torvalds 已提交
331

332
	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
333
	want_v = hpte_encode_avpn(vpn, psize, ssize);
L
Linus Torvalds 已提交
334

P
Paul Mackerras 已提交
335 336 337 338
	/* Bolted mappings are only ever in the primary group */
	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + slot;
339
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
340

P
Paul Mackerras 已提交
341 342 343 344
		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
			/* HPTE matches */
			return slot;
		++slot;
L
Linus Torvalds 已提交
345 346 347 348 349 350 351 352 353 354 355 356
	}

	return -1;
}

/*
 * Update the page protection bits. Intended to be used to create
 * guard pages for kernel data structures on pages which are bolted
 * in the HPT. Assumes pages being operated on will not be stolen.
 *
 * No need to lock here because we should be the only user.
 */
357
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
P
Paul Mackerras 已提交
358
				       int psize, int ssize)
L
Linus Torvalds 已提交
359
{
360 361
	unsigned long vpn;
	unsigned long vsid;
L
Linus Torvalds 已提交
362
	long slot;
363
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
364

P
Paul Mackerras 已提交
365
	vsid = get_kernel_vsid(ea, ssize);
366
	vpn = hpt_vpn(ea, vsid, ssize);
L
Linus Torvalds 已提交
367

368
	slot = native_hpte_find(vpn, psize, ssize);
L
Linus Torvalds 已提交
369 370 371 372
	if (slot == -1)
		panic("could not find page to bolt\n");
	hptep = htab_address + slot;

373
	/* Update the HPTE */
374 375 376
	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
			~(HPTE_R_PP | HPTE_R_N)) |
		(newpp & (HPTE_R_PP | HPTE_R_N)));
377 378 379 380 381
	/*
	 * Ensure it is out of the tlb too. Bolted entries base and
	 * actual page size will be same.
	 */
	tlbie(vpn, psize, psize, ssize, 0);
L
Linus Torvalds 已提交
382 383
}

384
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
385
				   int bpsize, int apsize, int ssize, int local)
L
Linus Torvalds 已提交
386
{
387
	struct hash_pte *hptep = htab_address + slot;
388
	unsigned long hpte_v;
389
	unsigned long want_v;
L
Linus Torvalds 已提交
390 391 392 393
	unsigned long flags;

	local_irq_save(flags);

394
	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
395

396
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
397
	native_lock_hpte(hptep);
398
	hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
399

400 401 402 403 404 405 406
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
407
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
L
Linus Torvalds 已提交
408
		native_unlock_hpte(hptep);
409
	else
L
Linus Torvalds 已提交
410
		/* Invalidate the hpte. NOTE: this also unlocks it */
411
		hptep->v = 0;
L
Linus Torvalds 已提交
412

413
	/* Invalidate the TLB */
414 415
	tlbie(vpn, bpsize, apsize, ssize, local);

L
Linus Torvalds 已提交
416 417 418
	local_irq_restore(flags);
}

419 420
static void native_hugepage_invalidate(unsigned long vsid,
				       unsigned long addr,
421
				       unsigned char *hpte_slot_array,
422
				       int psize, int ssize)
423
{
424
	int i;
425 426 427 428 429
	struct hash_pte *hptep;
	int actual_psize = MMU_PAGE_16M;
	unsigned int max_hpte_count, valid;
	unsigned long flags, s_addr = addr;
	unsigned long hpte_v, want_v, shift;
430
	unsigned long hidx, vpn = 0, hash, slot;
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454

	shift = mmu_psize_defs[psize].shift;
	max_hpte_count = 1U << (PMD_SHIFT - shift);

	local_irq_save(flags);
	for (i = 0; i < max_hpte_count; i++) {
		valid = hpte_valid(hpte_slot_array, i);
		if (!valid)
			continue;
		hidx =  hpte_hash_index(hpte_slot_array, i);

		/* get the vpn */
		addr = s_addr + (i * (1ul << shift));
		vpn = hpt_vpn(addr, vsid, ssize);
		hash = hpt_hash(vpn, shift, ssize);
		if (hidx & _PTEIDX_SECONDARY)
			hash = ~hash;

		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
		slot += hidx & _PTEIDX_GROUP_IX;

		hptep = htab_address + slot;
		want_v = hpte_encode_avpn(vpn, psize, ssize);
		native_lock_hpte(hptep);
455
		hpte_v = be64_to_cpu(hptep->v);
456 457 458 459 460 461 462

		/* Even if we miss, we need to invalidate the TLB */
		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
			native_unlock_hpte(hptep);
		else
			/* Invalidate the hpte. NOTE: this also unlocks it */
			hptep->v = 0;
463 464 465 466 467 468
		/*
		 * We need to do tlb invalidate for all the address, tlbie
		 * instruction compares entry_VA in tlb with the VA specified
		 * here
		 */
		tlbie(vpn, psize, actual_psize, ssize, 0);
469 470 471 472
	}
	local_irq_restore(flags);
}

473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
	int i, shift;
	unsigned int mask;

	/* start from 1 ignoring MMU_PAGE_4K */
	for (i = 1; i < MMU_PAGE_COUNT; i++) {

		/* invalid penc */
		if (mmu_psize_defs[psize].penc[i] == -1)
			continue;
		/*
		 * encoding bits per actual page size
		 *        PTE LP     actual page size
		 *    rrrr rrrz		>=8KB
		 *    rrrr rrzz		>=16KB
		 *    rrrr rzzz		>=32KB
		 *    rrrr zzzz		>=64KB
		 * .......
		 */
		shift = mmu_psize_defs[i].shift - LP_SHIFT;
		if (shift > LP_BITS)
			shift = LP_BITS;
		mask = (1 << shift) - 1;
		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
			return i;
	}
	return -1;
}

503
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
504
			int *psize, int *apsize, int *ssize, unsigned long *vpn)
505
{
506
	unsigned long avpn, pteg, vpi;
507 508
	unsigned long hpte_v = be64_to_cpu(hpte->v);
	unsigned long hpte_r = be64_to_cpu(hpte->r);
509
	unsigned long vsid, seg_off;
510 511
	int size, a_size, shift;
	/* Look at the 8 bit LP value */
512
	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
513

514 515 516 517
	if (!(hpte_v & HPTE_V_LARGE)) {
		size   = MMU_PAGE_4K;
		a_size = MMU_PAGE_4K;
	} else {
518
		for (size = 0; size < MMU_PAGE_COUNT; size++) {
519

520 521 522
			/* valid entries have a shift value */
			if (!mmu_psize_defs[size].shift)
				continue;
523

524 525 526
			a_size = __hpte_actual_psize(lp, size);
			if (a_size != -1)
				break;
527 528
		}
	}
529
	/* This works for all page sizes, and for 256M and 1T segments */
530
	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
531 532
	shift = mmu_psize_defs[size].shift;

533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
	pteg = slot / HPTES_PER_GROUP;
	if (hpte_v & HPTE_V_SECONDARY)
		pteg = ~pteg;

	switch (*ssize) {
	case MMU_SEGSIZE_256M:
		/* We only have 28 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1f) << 23;
		vsid    =  avpn >> 5;
		/* We can find more bits from the pteg value */
		if (shift < 23) {
			vpi = (vsid ^ pteg) & htab_hash_mask;
			seg_off |= vpi << shift;
		}
548
		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
549
		break;
550 551 552 553 554
	case MMU_SEGSIZE_1T:
		/* We only have 40 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1ffff) << 23;
		vsid    = avpn >> 17;
		if (shift < 23) {
555
			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
556
			seg_off |= vpi << shift;
557
		}
558
		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
559
		break;
560
	default:
561
		*vpn = size = 0;
562
	}
563 564
	*psize  = size;
	*apsize = a_size;
565 566
}

567 568 569 570 571 572 573 574 575 576
/*
 * clear all mappings on kexec.  All cpus are in real mode (or they will
 * be when they isi), and we are the only one left.  We rely on our kernel
 * mapping being 0xC0's and the hardware ignoring those two real bits.
 *
 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 * athough there is the control page available...
 */
static void native_hpte_clear(void)
{
577
	unsigned long vpn = 0;
578
	unsigned long slot, slots, flags;
579
	struct hash_pte *hptep = htab_address;
580
	unsigned long hpte_v;
581
	unsigned long pteg_count;
582
	int psize, apsize, ssize;
583 584 585 586 587 588 589 590

	pteg_count = htab_hash_mask + 1;

	local_irq_save(flags);

	/* we take the tlbie lock and hold it.  Some hardware will
	 * deadlock if we try to tlbie from two processors at once.
	 */
591
	raw_spin_lock(&native_tlbie_lock);
592 593 594 595 596 597 598 599 600

	slots = pteg_count * HPTES_PER_GROUP;

	for (slot = 0; slot < slots; slot++, hptep++) {
		/*
		 * we could lock the pte here, but we are the only cpu
		 * running,  right?  and for crash dump, we probably
		 * don't want to wait for a maybe bad cpu.
		 */
601
		hpte_v = be64_to_cpu(hptep->v);
602

603 604 605 606
		/*
		 * Call __tlbie() here rather than tlbie() since we
		 * already hold the native_tlbie_lock.
		 */
607
		if (hpte_v & HPTE_V_VALID) {
608
			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
609
			hptep->v = 0;
610
			__tlbie(vpn, psize, apsize, ssize);
611 612 613
		}
	}

614
	asm volatile("eieio; tlbsync; ptesync":::"memory");
615
	raw_spin_unlock(&native_tlbie_lock);
616 617 618
	local_irq_restore(flags);
}

619 620 621 622
/*
 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 * the lock all the time
 */
623
static void native_flush_hash_range(unsigned long number, int local)
L
Linus Torvalds 已提交
624
{
625 626
	unsigned long vpn;
	unsigned long hash, index, hidx, shift, slot;
627
	struct hash_pte *hptep;
628
	unsigned long hpte_v;
629 630 631
	unsigned long want_v;
	unsigned long flags;
	real_pte_t pte;
L
Linus Torvalds 已提交
632
	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
633
	unsigned long psize = batch->psize;
P
Paul Mackerras 已提交
634
	int ssize = batch->ssize;
635
	int i;
L
Linus Torvalds 已提交
636 637 638 639

	local_irq_save(flags);

	for (i = 0; i < number; i++) {
640
		vpn = batch->vpn[i];
641 642
		pte = batch->pte[i];

643 644
		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
			hash = hpt_hash(vpn, shift, ssize);
645 646 647 648 649 650
			hidx = __rpte_to_hidx(pte, index);
			if (hidx & _PTEIDX_SECONDARY)
				hash = ~hash;
			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
			slot += hidx & _PTEIDX_GROUP_IX;
			hptep = htab_address + slot;
651
			want_v = hpte_encode_avpn(vpn, psize, ssize);
652
			native_lock_hpte(hptep);
653
			hpte_v = be64_to_cpu(hptep->v);
654 655 656 657 658 659
			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
			    !(hpte_v & HPTE_V_VALID))
				native_unlock_hpte(hptep);
			else
				hptep->v = 0;
		} pte_iterate_hashed_end();
L
Linus Torvalds 已提交
660 661
	}

662
	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
663
	    mmu_psize_defs[psize].tlbiel && local) {
L
Linus Torvalds 已提交
664
		asm volatile("ptesync":::"memory");
665
		for (i = 0; i < number; i++) {
666
			vpn = batch->vpn[i];
667 668
			pte = batch->pte[i];

669 670
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
671
				__tlbiel(vpn, psize, psize, ssize);
672 673
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
674 675
		asm volatile("ptesync":::"memory");
	} else {
676
		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
L
Linus Torvalds 已提交
677 678

		if (lock_tlbie)
679
			raw_spin_lock(&native_tlbie_lock);
L
Linus Torvalds 已提交
680 681

		asm volatile("ptesync":::"memory");
682
		for (i = 0; i < number; i++) {
683
			vpn = batch->vpn[i];
684 685
			pte = batch->pte[i];

686 687
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
688
				__tlbie(vpn, psize, psize, ssize);
689 690
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
691 692 693
		asm volatile("eieio; tlbsync; ptesync":::"memory");

		if (lock_tlbie)
694
			raw_spin_unlock(&native_tlbie_lock);
L
Linus Torvalds 已提交
695 696 697 698 699
	}

	local_irq_restore(flags);
}

700
void __init hpte_init_native(void)
L
Linus Torvalds 已提交
701 702 703 704 705
{
	ppc_md.hpte_invalidate	= native_hpte_invalidate;
	ppc_md.hpte_updatepp	= native_hpte_updatepp;
	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
	ppc_md.hpte_insert	= native_hpte_insert;
706 707
	ppc_md.hpte_remove	= native_hpte_remove;
	ppc_md.hpte_clear_all	= native_hpte_clear;
708
	ppc_md.flush_hash_range = native_flush_hash_range;
709
	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
L
Linus Torvalds 已提交
710
}