hash_native_64.c 19.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * native hashtable management.
 *
 * SMP scalability work:
 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
12 13 14

#undef DEBUG_LOW

L
Linus Torvalds 已提交
15 16
#include <linux/spinlock.h>
#include <linux/bitops.h>
17
#include <linux/of.h>
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27
#include <linux/threads.h>
#include <linux/smp.h>

#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
28
#include <asm/udbg.h>
29
#include <asm/kexec.h>
30
#include <asm/ppc-opcode.h>
31 32 33 34 35 36

#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
#else
#define DBG_LOW(fmt...)
#endif
L
Linus Torvalds 已提交
37

38
#ifdef __BIG_ENDIAN__
L
Linus Torvalds 已提交
39
#define HPTE_LOCK_BIT 3
40 41 42
#else
#define HPTE_LOCK_BIT (56+3)
#endif
L
Linus Torvalds 已提交
43

44
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
L
Linus Torvalds 已提交
45

46
static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
47
{
48
	unsigned long va;
49
	unsigned int penc;
50
	unsigned long sllp;
51

52 53 54 55 56 57 58 59 60 61 62 63 64
	/*
	 * We need 14 to 65 bits of va for a tlibe of 4K page
	 * With vpn we ignore the lower VPN_SHIFT bits already.
	 * And top two bits are already ignored because we can
	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
	 * of 12.
	 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
65 66 67 68
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
69 70
		/* clear out bits after (52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
71
		va |= ssize << 8;
72 73 74
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
75
		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
76
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
77
			     : "memory");
78 79
		break;
	default:
80
		/* We need 14 to 14 + i bits of va */
81
		penc = mmu_psize_defs[psize].penc[apsize];
82
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
83
		va |= penc << 12;
P
Paul Mackerras 已提交
84
		va |= ssize << 8;
85 86 87 88 89 90 91 92 93 94 95
		/* Add AVAL part */
		if (psize != apsize) {
			/*
			 * MPSS, 64K base page size and 16MB parge page size
			 * We don't need all the bits, but rest of the bits
			 * must be ignored by the processor.
			 * vpn cover upto 65 bits of va. (0...65) and we need
			 * 58..64 bits of va.
			 */
			va |= (vpn & 0xfe);
		}
96
		va |= 1; /* L */
97
		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
98
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
99
			     : "memory");
100 101 102 103
		break;
	}
}

104
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
105
{
106
	unsigned long va;
107
	unsigned int penc;
108
	unsigned long sllp;
109

110 111 112 113 114 115 116
	/* VPN_SHIFT can be atmost 12 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64 bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
117 118 119 120
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
121 122
		/* clear out bits after(52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
123
		va |= ssize << 8;
124 125 126
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
127 128 129 130
		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
			     : : "r"(va) : "memory");
		break;
	default:
131
		/* We need 14 to 14 + i bits of va */
132
		penc = mmu_psize_defs[psize].penc[apsize];
133
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
134
		va |= penc << 12;
P
Paul Mackerras 已提交
135
		va |= ssize << 8;
136 137 138 139 140 141 142 143 144 145 146
		/* Add AVAL part */
		if (psize != apsize) {
			/*
			 * MPSS, 64K base page size and 16MB parge page size
			 * We don't need all the bits, but rest of the bits
			 * must be ignored by the processor.
			 * vpn cover upto 65 bits of va. (0...65) and we need
			 * 58..64 bits of va.
			 */
			va |= (vpn & 0xfe);
		}
147
		va |= 1; /* L */
148 149 150 151 152 153 154
		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
			     : : "r"(va) : "memory");
		break;
	}

}

155 156
static inline void tlbie(unsigned long vpn, int psize, int apsize,
			 int ssize, int local)
157
{
158 159
	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
160 161 162 163

	if (use_local)
		use_local = mmu_psize_defs[psize].tlbiel;
	if (lock_tlbie && !use_local)
164
		raw_spin_lock(&native_tlbie_lock);
165 166
	asm volatile("ptesync": : :"memory");
	if (use_local) {
167
		__tlbiel(vpn, psize, apsize, ssize);
168 169
		asm volatile("ptesync": : :"memory");
	} else {
170
		__tlbie(vpn, psize, apsize, ssize);
171 172 173
		asm volatile("eieio; tlbsync; ptesync": : :"memory");
	}
	if (lock_tlbie && !use_local)
174
		raw_spin_unlock(&native_tlbie_lock);
175 176
}

177
static inline void native_lock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
178
{
179
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
180 181

	while (1) {
182
		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
L
Linus Torvalds 已提交
183 184 185 186 187 188
			break;
		while(test_bit(HPTE_LOCK_BIT, word))
			cpu_relax();
	}
}

189
static inline void native_unlock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
190
{
191
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
192

193
	clear_bit_unlock(HPTE_LOCK_BIT, word);
L
Linus Torvalds 已提交
194 195
}

196
static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
197
			unsigned long pa, unsigned long rflags,
198
			unsigned long vflags, int psize, int apsize, int ssize)
L
Linus Torvalds 已提交
199
{
200
	struct hash_pte *hptep = htab_address + hpte_group;
201
	unsigned long hpte_v, hpte_r;
L
Linus Torvalds 已提交
202 203
	int i;

204
	if (!(vflags & HPTE_V_BOLTED)) {
205
		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
206
			" rflags=%lx, vflags=%lx, psize=%d)\n",
207
			hpte_group, vpn, pa, rflags, vflags, psize);
208 209
	}

L
Linus Torvalds 已提交
210
	for (i = 0; i < HPTES_PER_GROUP; i++) {
211
		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
L
Linus Torvalds 已提交
212 213
			/* retry with lock held */
			native_lock_hpte(hptep);
214
			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
L
Linus Torvalds 已提交
215 216 217 218 219 220 221 222 223 224
				break;
			native_unlock_hpte(hptep);
		}

		hptep++;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

225 226
	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
227 228 229 230 231

	if (!(vflags & HPTE_V_BOLTED)) {
		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
			i, hpte_v, hpte_r);
	}
L
Linus Torvalds 已提交
232

233
	hptep->r = cpu_to_be64(hpte_r);
L
Linus Torvalds 已提交
234
	/* Guarantee the second dword is visible before the valid bit */
235
	eieio();
L
Linus Torvalds 已提交
236 237 238 239
	/*
	 * Now set the first dword including the valid bit
	 * NOTE: this also unlocks the hpte
	 */
240
	hptep->v = cpu_to_be64(hpte_v);
L
Linus Torvalds 已提交
241 242 243

	__asm__ __volatile__ ("ptesync" : : : "memory");

244
	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
L
Linus Torvalds 已提交
245 246 247 248
}

static long native_hpte_remove(unsigned long hpte_group)
{
249
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
250 251
	int i;
	int slot_offset;
252
	unsigned long hpte_v;
L
Linus Torvalds 已提交
253

254 255
	DBG_LOW("    remove(group=%lx)\n", hpte_group);

L
Linus Torvalds 已提交
256 257 258 259 260
	/* pick a random entry to start at */
	slot_offset = mftb() & 0x7;

	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + hpte_group + slot_offset;
261
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
262

263
		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
L
Linus Torvalds 已提交
264 265
			/* retry with lock held */
			native_lock_hpte(hptep);
266
			hpte_v = be64_to_cpu(hptep->v);
267 268
			if ((hpte_v & HPTE_V_VALID)
			    && !(hpte_v & HPTE_V_BOLTED))
L
Linus Torvalds 已提交
269 270 271 272 273 274 275 276 277 278 279 280
				break;
			native_unlock_hpte(hptep);
		}

		slot_offset++;
		slot_offset &= 0x7;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

	/* Invalidate the hpte. NOTE: this also unlocks it */
281
	hptep->v = 0;
L
Linus Torvalds 已提交
282 283 284 285

	return i;
}

286
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
287 288
				 unsigned long vpn, int bpsize,
				 int apsize, int ssize, int local)
L
Linus Torvalds 已提交
289
{
290
	struct hash_pte *hptep = htab_address + slot;
291 292 293
	unsigned long hpte_v, want_v;
	int ret = 0;

294
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
295

296 297
	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
		vpn, want_v & HPTE_V_AVPN, slot, newpp);
298 299 300

	native_lock_hpte(hptep);

301
	hpte_v = be64_to_cpu(hptep->v);
302 303 304 305 306 307 308
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
309
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
310 311 312 313 314
		DBG_LOW(" -> miss\n");
		ret = -1;
	} else {
		DBG_LOW(" -> hit\n");
		/* Update the HPTE */
315 316
		hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
317
	}
318
	native_unlock_hpte(hptep);
319 320

	/* Ensure it is out of the tlb too. */
321
	tlbie(vpn, bpsize, apsize, ssize, local);
322 323

	return ret;
L
Linus Torvalds 已提交
324 325
}

326
static long native_hpte_find(unsigned long vpn, int psize, int ssize)
L
Linus Torvalds 已提交
327
{
328
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
329
	unsigned long hash;
P
Paul Mackerras 已提交
330
	unsigned long i;
L
Linus Torvalds 已提交
331
	long slot;
332
	unsigned long want_v, hpte_v;
L
Linus Torvalds 已提交
333

334
	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
335
	want_v = hpte_encode_avpn(vpn, psize, ssize);
L
Linus Torvalds 已提交
336

P
Paul Mackerras 已提交
337 338 339 340
	/* Bolted mappings are only ever in the primary group */
	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + slot;
341
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
342

P
Paul Mackerras 已提交
343 344 345 346
		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
			/* HPTE matches */
			return slot;
		++slot;
L
Linus Torvalds 已提交
347 348 349 350 351 352 353 354 355 356 357 358
	}

	return -1;
}

/*
 * Update the page protection bits. Intended to be used to create
 * guard pages for kernel data structures on pages which are bolted
 * in the HPT. Assumes pages being operated on will not be stolen.
 *
 * No need to lock here because we should be the only user.
 */
359
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
P
Paul Mackerras 已提交
360
				       int psize, int ssize)
L
Linus Torvalds 已提交
361
{
362 363
	unsigned long vpn;
	unsigned long vsid;
L
Linus Torvalds 已提交
364
	long slot;
365
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
366

P
Paul Mackerras 已提交
367
	vsid = get_kernel_vsid(ea, ssize);
368
	vpn = hpt_vpn(ea, vsid, ssize);
L
Linus Torvalds 已提交
369

370
	slot = native_hpte_find(vpn, psize, ssize);
L
Linus Torvalds 已提交
371 372 373 374
	if (slot == -1)
		panic("could not find page to bolt\n");
	hptep = htab_address + slot;

375
	/* Update the HPTE */
376 377 378
	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
			~(HPTE_R_PP | HPTE_R_N)) |
		(newpp & (HPTE_R_PP | HPTE_R_N)));
379 380 381 382 383
	/*
	 * Ensure it is out of the tlb too. Bolted entries base and
	 * actual page size will be same.
	 */
	tlbie(vpn, psize, psize, ssize, 0);
L
Linus Torvalds 已提交
384 385
}

386
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
387
				   int bpsize, int apsize, int ssize, int local)
L
Linus Torvalds 已提交
388
{
389
	struct hash_pte *hptep = htab_address + slot;
390
	unsigned long hpte_v;
391
	unsigned long want_v;
L
Linus Torvalds 已提交
392 393 394 395
	unsigned long flags;

	local_irq_save(flags);

396
	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
397

398
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
399
	native_lock_hpte(hptep);
400
	hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
401

402 403 404 405 406 407 408
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
409
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
L
Linus Torvalds 已提交
410
		native_unlock_hpte(hptep);
411
	else
L
Linus Torvalds 已提交
412
		/* Invalidate the hpte. NOTE: this also unlocks it */
413
		hptep->v = 0;
L
Linus Torvalds 已提交
414

415
	/* Invalidate the TLB */
416 417
	tlbie(vpn, bpsize, apsize, ssize, local);

L
Linus Torvalds 已提交
418 419 420
	local_irq_restore(flags);
}

421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
static void native_hugepage_invalidate(struct mm_struct *mm,
				       unsigned char *hpte_slot_array,
				       unsigned long addr, int psize)
{
	int ssize = 0, i;
	int lock_tlbie;
	struct hash_pte *hptep;
	int actual_psize = MMU_PAGE_16M;
	unsigned int max_hpte_count, valid;
	unsigned long flags, s_addr = addr;
	unsigned long hpte_v, want_v, shift;
	unsigned long hidx, vpn = 0, vsid, hash, slot;

	shift = mmu_psize_defs[psize].shift;
	max_hpte_count = 1U << (PMD_SHIFT - shift);

	local_irq_save(flags);
	for (i = 0; i < max_hpte_count; i++) {
		valid = hpte_valid(hpte_slot_array, i);
		if (!valid)
			continue;
		hidx =  hpte_hash_index(hpte_slot_array, i);

		/* get the vpn */
		addr = s_addr + (i * (1ul << shift));
		if (!is_kernel_addr(addr)) {
			ssize = user_segment_size(addr);
			vsid = get_vsid(mm->context.id, addr, ssize);
			WARN_ON(vsid == 0);
		} else {
			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
			ssize = mmu_kernel_ssize;
		}

		vpn = hpt_vpn(addr, vsid, ssize);
		hash = hpt_hash(vpn, shift, ssize);
		if (hidx & _PTEIDX_SECONDARY)
			hash = ~hash;

		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
		slot += hidx & _PTEIDX_GROUP_IX;

		hptep = htab_address + slot;
		want_v = hpte_encode_avpn(vpn, psize, ssize);
		native_lock_hpte(hptep);
466
		hpte_v = be64_to_cpu(hptep->v);
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492

		/* Even if we miss, we need to invalidate the TLB */
		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
			native_unlock_hpte(hptep);
		else
			/* Invalidate the hpte. NOTE: this also unlocks it */
			hptep->v = 0;
	}
	/*
	 * Since this is a hugepage, we just need a single tlbie.
	 * use the last vpn.
	 */
	lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
	if (lock_tlbie)
		raw_spin_lock(&native_tlbie_lock);

	asm volatile("ptesync":::"memory");
	__tlbie(vpn, psize, actual_psize, ssize);
	asm volatile("eieio; tlbsync; ptesync":::"memory");

	if (lock_tlbie)
		raw_spin_unlock(&native_tlbie_lock);

	local_irq_restore(flags);
}

493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
	int i, shift;
	unsigned int mask;

	/* start from 1 ignoring MMU_PAGE_4K */
	for (i = 1; i < MMU_PAGE_COUNT; i++) {

		/* invalid penc */
		if (mmu_psize_defs[psize].penc[i] == -1)
			continue;
		/*
		 * encoding bits per actual page size
		 *        PTE LP     actual page size
		 *    rrrr rrrz		>=8KB
		 *    rrrr rrzz		>=16KB
		 *    rrrr rzzz		>=32KB
		 *    rrrr zzzz		>=64KB
		 * .......
		 */
		shift = mmu_psize_defs[i].shift - LP_SHIFT;
		if (shift > LP_BITS)
			shift = LP_BITS;
		mask = (1 << shift) - 1;
		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
			return i;
	}
	return -1;
}

523
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
524
			int *psize, int *apsize, int *ssize, unsigned long *vpn)
525
{
526
	unsigned long avpn, pteg, vpi;
527 528
	unsigned long hpte_v = be64_to_cpu(hpte->v);
	unsigned long hpte_r = be64_to_cpu(hpte->r);
529
	unsigned long vsid, seg_off;
530 531
	int size, a_size, shift;
	/* Look at the 8 bit LP value */
532
	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
533

534 535 536 537
	if (!(hpte_v & HPTE_V_LARGE)) {
		size   = MMU_PAGE_4K;
		a_size = MMU_PAGE_4K;
	} else {
538
		for (size = 0; size < MMU_PAGE_COUNT; size++) {
539

540 541 542
			/* valid entries have a shift value */
			if (!mmu_psize_defs[size].shift)
				continue;
543

544 545 546
			a_size = __hpte_actual_psize(lp, size);
			if (a_size != -1)
				break;
547 548
		}
	}
549
	/* This works for all page sizes, and for 256M and 1T segments */
550
	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
551 552
	shift = mmu_psize_defs[size].shift;

553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
	pteg = slot / HPTES_PER_GROUP;
	if (hpte_v & HPTE_V_SECONDARY)
		pteg = ~pteg;

	switch (*ssize) {
	case MMU_SEGSIZE_256M:
		/* We only have 28 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1f) << 23;
		vsid    =  avpn >> 5;
		/* We can find more bits from the pteg value */
		if (shift < 23) {
			vpi = (vsid ^ pteg) & htab_hash_mask;
			seg_off |= vpi << shift;
		}
568
		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
569
		break;
570 571 572 573 574
	case MMU_SEGSIZE_1T:
		/* We only have 40 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1ffff) << 23;
		vsid    = avpn >> 17;
		if (shift < 23) {
575
			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
576
			seg_off |= vpi << shift;
577
		}
578
		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
579
		break;
580
	default:
581
		*vpn = size = 0;
582
	}
583 584
	*psize  = size;
	*apsize = a_size;
585 586
}

587 588 589 590 591 592 593 594 595 596
/*
 * clear all mappings on kexec.  All cpus are in real mode (or they will
 * be when they isi), and we are the only one left.  We rely on our kernel
 * mapping being 0xC0's and the hardware ignoring those two real bits.
 *
 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 * athough there is the control page available...
 */
static void native_hpte_clear(void)
{
597
	unsigned long vpn = 0;
598
	unsigned long slot, slots, flags;
599
	struct hash_pte *hptep = htab_address;
600
	unsigned long hpte_v;
601
	unsigned long pteg_count;
602
	int psize, apsize, ssize;
603 604 605 606 607 608 609 610

	pteg_count = htab_hash_mask + 1;

	local_irq_save(flags);

	/* we take the tlbie lock and hold it.  Some hardware will
	 * deadlock if we try to tlbie from two processors at once.
	 */
611
	raw_spin_lock(&native_tlbie_lock);
612 613 614 615 616 617 618 619 620

	slots = pteg_count * HPTES_PER_GROUP;

	for (slot = 0; slot < slots; slot++, hptep++) {
		/*
		 * we could lock the pte here, but we are the only cpu
		 * running,  right?  and for crash dump, we probably
		 * don't want to wait for a maybe bad cpu.
		 */
621
		hpte_v = be64_to_cpu(hptep->v);
622

623 624 625 626
		/*
		 * Call __tlbie() here rather than tlbie() since we
		 * already hold the native_tlbie_lock.
		 */
627
		if (hpte_v & HPTE_V_VALID) {
628
			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
629
			hptep->v = 0;
630
			__tlbie(vpn, psize, apsize, ssize);
631 632 633
		}
	}

634
	asm volatile("eieio; tlbsync; ptesync":::"memory");
635
	raw_spin_unlock(&native_tlbie_lock);
636 637 638
	local_irq_restore(flags);
}

639 640 641 642
/*
 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 * the lock all the time
 */
643
static void native_flush_hash_range(unsigned long number, int local)
L
Linus Torvalds 已提交
644
{
645 646
	unsigned long vpn;
	unsigned long hash, index, hidx, shift, slot;
647
	struct hash_pte *hptep;
648
	unsigned long hpte_v;
649 650 651
	unsigned long want_v;
	unsigned long flags;
	real_pte_t pte;
L
Linus Torvalds 已提交
652
	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
653
	unsigned long psize = batch->psize;
P
Paul Mackerras 已提交
654
	int ssize = batch->ssize;
655
	int i;
L
Linus Torvalds 已提交
656 657 658 659

	local_irq_save(flags);

	for (i = 0; i < number; i++) {
660
		vpn = batch->vpn[i];
661 662
		pte = batch->pte[i];

663 664
		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
			hash = hpt_hash(vpn, shift, ssize);
665 666 667 668 669 670
			hidx = __rpte_to_hidx(pte, index);
			if (hidx & _PTEIDX_SECONDARY)
				hash = ~hash;
			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
			slot += hidx & _PTEIDX_GROUP_IX;
			hptep = htab_address + slot;
671
			want_v = hpte_encode_avpn(vpn, psize, ssize);
672
			native_lock_hpte(hptep);
673
			hpte_v = be64_to_cpu(hptep->v);
674 675 676 677 678 679
			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
			    !(hpte_v & HPTE_V_VALID))
				native_unlock_hpte(hptep);
			else
				hptep->v = 0;
		} pte_iterate_hashed_end();
L
Linus Torvalds 已提交
680 681
	}

682
	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
683
	    mmu_psize_defs[psize].tlbiel && local) {
L
Linus Torvalds 已提交
684
		asm volatile("ptesync":::"memory");
685
		for (i = 0; i < number; i++) {
686
			vpn = batch->vpn[i];
687 688
			pte = batch->pte[i];

689 690
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
691
				__tlbiel(vpn, psize, psize, ssize);
692 693
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
694 695
		asm volatile("ptesync":::"memory");
	} else {
696
		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
L
Linus Torvalds 已提交
697 698

		if (lock_tlbie)
699
			raw_spin_lock(&native_tlbie_lock);
L
Linus Torvalds 已提交
700 701

		asm volatile("ptesync":::"memory");
702
		for (i = 0; i < number; i++) {
703
			vpn = batch->vpn[i];
704 705
			pte = batch->pte[i];

706 707
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
708
				__tlbie(vpn, psize, psize, ssize);
709 710
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
711 712 713
		asm volatile("eieio; tlbsync; ptesync":::"memory");

		if (lock_tlbie)
714
			raw_spin_unlock(&native_tlbie_lock);
L
Linus Torvalds 已提交
715 716 717 718 719
	}

	local_irq_restore(flags);
}

720
void __init hpte_init_native(void)
L
Linus Torvalds 已提交
721 722 723 724 725
{
	ppc_md.hpte_invalidate	= native_hpte_invalidate;
	ppc_md.hpte_updatepp	= native_hpte_updatepp;
	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
	ppc_md.hpte_insert	= native_hpte_insert;
726 727
	ppc_md.hpte_remove	= native_hpte_remove;
	ppc_md.hpte_clear_all	= native_hpte_clear;
728
	ppc_md.flush_hash_range = native_flush_hash_range;
729
	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
L
Linus Torvalds 已提交
730
}