hash_native_64.c 18.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * native hashtable management.
 *
 * SMP scalability work:
 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
12 13 14

#undef DEBUG_LOW

L
Linus Torvalds 已提交
15 16
#include <linux/spinlock.h>
#include <linux/bitops.h>
17
#include <linux/of.h>
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27
#include <linux/threads.h>
#include <linux/smp.h>

#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
28
#include <asm/udbg.h>
29
#include <asm/kexec.h>
30
#include <asm/ppc-opcode.h>
31 32 33 34 35 36

#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
#else
#define DBG_LOW(fmt...)
#endif
L
Linus Torvalds 已提交
37

38
#ifdef __BIG_ENDIAN__
L
Linus Torvalds 已提交
39
#define HPTE_LOCK_BIT 3
40 41 42
#else
#define HPTE_LOCK_BIT (56+3)
#endif
L
Linus Torvalds 已提交
43

44
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
L
Linus Torvalds 已提交
45

46
static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
47
{
48
	unsigned long va;
49
	unsigned int penc;
50
	unsigned long sllp;
51

52 53 54 55 56 57 58 59 60 61 62 63 64
	/*
	 * We need 14 to 65 bits of va for a tlibe of 4K page
	 * With vpn we ignore the lower VPN_SHIFT bits already.
	 * And top two bits are already ignored because we can
	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
	 * of 12.
	 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
65 66 67 68
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
69 70
		/* clear out bits after (52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
71
		va |= ssize << 8;
72 73 74
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
75
		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
76
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
77
			     : "memory");
78 79
		break;
	default:
80
		/* We need 14 to 14 + i bits of va */
81
		penc = mmu_psize_defs[psize].penc[apsize];
82
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
83
		va |= penc << 12;
P
Paul Mackerras 已提交
84
		va |= ssize << 8;
85 86 87 88 89 90 91 92
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe); /* AVAL */
93
		va |= 1; /* L */
94
		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
95
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
96
			     : "memory");
97 98 99 100
		break;
	}
}

101
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
102
{
103
	unsigned long va;
104
	unsigned int penc;
105
	unsigned long sllp;
106

107 108 109 110 111 112 113
	/* VPN_SHIFT can be atmost 12 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64 bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
114 115 116 117
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
118 119
		/* clear out bits after(52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
120
		va |= ssize << 8;
121 122 123
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
124 125 126 127
		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
			     : : "r"(va) : "memory");
		break;
	default:
128
		/* We need 14 to 14 + i bits of va */
129
		penc = mmu_psize_defs[psize].penc[apsize];
130
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
131
		va |= penc << 12;
P
Paul Mackerras 已提交
132
		va |= ssize << 8;
133 134 135 136 137 138 139 140
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe);
141
		va |= 1; /* L */
142 143 144 145 146 147 148
		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
			     : : "r"(va) : "memory");
		break;
	}

}

149 150
static inline void tlbie(unsigned long vpn, int psize, int apsize,
			 int ssize, int local)
151
{
152 153
	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
154 155 156 157

	if (use_local)
		use_local = mmu_psize_defs[psize].tlbiel;
	if (lock_tlbie && !use_local)
158
		raw_spin_lock(&native_tlbie_lock);
159 160
	asm volatile("ptesync": : :"memory");
	if (use_local) {
161
		__tlbiel(vpn, psize, apsize, ssize);
162 163
		asm volatile("ptesync": : :"memory");
	} else {
164
		__tlbie(vpn, psize, apsize, ssize);
165 166 167
		asm volatile("eieio; tlbsync; ptesync": : :"memory");
	}
	if (lock_tlbie && !use_local)
168
		raw_spin_unlock(&native_tlbie_lock);
169 170
}

171
static inline void native_lock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
172
{
173
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
174 175

	while (1) {
176
		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
L
Linus Torvalds 已提交
177 178 179 180 181 182
			break;
		while(test_bit(HPTE_LOCK_BIT, word))
			cpu_relax();
	}
}

183
static inline void native_unlock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
184
{
185
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
186

187
	clear_bit_unlock(HPTE_LOCK_BIT, word);
L
Linus Torvalds 已提交
188 189
}

190
static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
191
			unsigned long pa, unsigned long rflags,
192
			unsigned long vflags, int psize, int apsize, int ssize)
L
Linus Torvalds 已提交
193
{
194
	struct hash_pte *hptep = htab_address + hpte_group;
195
	unsigned long hpte_v, hpte_r;
L
Linus Torvalds 已提交
196 197
	int i;

198
	if (!(vflags & HPTE_V_BOLTED)) {
199
		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
200
			" rflags=%lx, vflags=%lx, psize=%d)\n",
201
			hpte_group, vpn, pa, rflags, vflags, psize);
202 203
	}

L
Linus Torvalds 已提交
204
	for (i = 0; i < HPTES_PER_GROUP; i++) {
205
		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
L
Linus Torvalds 已提交
206 207
			/* retry with lock held */
			native_lock_hpte(hptep);
208
			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
L
Linus Torvalds 已提交
209 210 211 212 213 214 215 216 217 218
				break;
			native_unlock_hpte(hptep);
		}

		hptep++;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

219 220
	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
221 222 223 224 225

	if (!(vflags & HPTE_V_BOLTED)) {
		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
			i, hpte_v, hpte_r);
	}
L
Linus Torvalds 已提交
226

227
	hptep->r = cpu_to_be64(hpte_r);
L
Linus Torvalds 已提交
228
	/* Guarantee the second dword is visible before the valid bit */
229
	eieio();
L
Linus Torvalds 已提交
230 231 232 233
	/*
	 * Now set the first dword including the valid bit
	 * NOTE: this also unlocks the hpte
	 */
234
	hptep->v = cpu_to_be64(hpte_v);
L
Linus Torvalds 已提交
235 236 237

	__asm__ __volatile__ ("ptesync" : : : "memory");

238
	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
L
Linus Torvalds 已提交
239 240 241 242
}

static long native_hpte_remove(unsigned long hpte_group)
{
243
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
244 245
	int i;
	int slot_offset;
246
	unsigned long hpte_v;
L
Linus Torvalds 已提交
247

248 249
	DBG_LOW("    remove(group=%lx)\n", hpte_group);

L
Linus Torvalds 已提交
250 251 252 253 254
	/* pick a random entry to start at */
	slot_offset = mftb() & 0x7;

	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + hpte_group + slot_offset;
255
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
256

257
		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
L
Linus Torvalds 已提交
258 259
			/* retry with lock held */
			native_lock_hpte(hptep);
260
			hpte_v = be64_to_cpu(hptep->v);
261 262
			if ((hpte_v & HPTE_V_VALID)
			    && !(hpte_v & HPTE_V_BOLTED))
L
Linus Torvalds 已提交
263 264 265 266 267 268 269 270 271 272 273 274
				break;
			native_unlock_hpte(hptep);
		}

		slot_offset++;
		slot_offset &= 0x7;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

	/* Invalidate the hpte. NOTE: this also unlocks it */
275
	hptep->v = 0;
L
Linus Torvalds 已提交
276 277 278 279

	return i;
}

280
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
281 282
				 unsigned long vpn, int bpsize,
				 int apsize, int ssize, int local)
L
Linus Torvalds 已提交
283
{
284
	struct hash_pte *hptep = htab_address + slot;
285 286 287
	unsigned long hpte_v, want_v;
	int ret = 0;

288
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
289

290 291
	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
		vpn, want_v & HPTE_V_AVPN, slot, newpp);
292 293 294

	native_lock_hpte(hptep);

295
	hpte_v = be64_to_cpu(hptep->v);
296 297 298 299 300 301 302
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
303
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
304 305 306 307 308
		DBG_LOW(" -> miss\n");
		ret = -1;
	} else {
		DBG_LOW(" -> hit\n");
		/* Update the HPTE */
309 310
		hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
311
	}
312
	native_unlock_hpte(hptep);
313 314

	/* Ensure it is out of the tlb too. */
315
	tlbie(vpn, bpsize, apsize, ssize, local);
316 317

	return ret;
L
Linus Torvalds 已提交
318 319
}

320
static long native_hpte_find(unsigned long vpn, int psize, int ssize)
L
Linus Torvalds 已提交
321
{
322
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
323
	unsigned long hash;
P
Paul Mackerras 已提交
324
	unsigned long i;
L
Linus Torvalds 已提交
325
	long slot;
326
	unsigned long want_v, hpte_v;
L
Linus Torvalds 已提交
327

328
	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
329
	want_v = hpte_encode_avpn(vpn, psize, ssize);
L
Linus Torvalds 已提交
330

P
Paul Mackerras 已提交
331 332 333 334
	/* Bolted mappings are only ever in the primary group */
	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + slot;
335
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
336

P
Paul Mackerras 已提交
337 338 339 340
		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
			/* HPTE matches */
			return slot;
		++slot;
L
Linus Torvalds 已提交
341 342 343 344 345 346 347 348 349 350 351 352
	}

	return -1;
}

/*
 * Update the page protection bits. Intended to be used to create
 * guard pages for kernel data structures on pages which are bolted
 * in the HPT. Assumes pages being operated on will not be stolen.
 *
 * No need to lock here because we should be the only user.
 */
353
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
P
Paul Mackerras 已提交
354
				       int psize, int ssize)
L
Linus Torvalds 已提交
355
{
356 357
	unsigned long vpn;
	unsigned long vsid;
L
Linus Torvalds 已提交
358
	long slot;
359
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
360

P
Paul Mackerras 已提交
361
	vsid = get_kernel_vsid(ea, ssize);
362
	vpn = hpt_vpn(ea, vsid, ssize);
L
Linus Torvalds 已提交
363

364
	slot = native_hpte_find(vpn, psize, ssize);
L
Linus Torvalds 已提交
365 366 367 368
	if (slot == -1)
		panic("could not find page to bolt\n");
	hptep = htab_address + slot;

369
	/* Update the HPTE */
370 371 372
	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
			~(HPTE_R_PP | HPTE_R_N)) |
		(newpp & (HPTE_R_PP | HPTE_R_N)));
373 374 375 376 377
	/*
	 * Ensure it is out of the tlb too. Bolted entries base and
	 * actual page size will be same.
	 */
	tlbie(vpn, psize, psize, ssize, 0);
L
Linus Torvalds 已提交
378 379
}

380
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
381
				   int bpsize, int apsize, int ssize, int local)
L
Linus Torvalds 已提交
382
{
383
	struct hash_pte *hptep = htab_address + slot;
384
	unsigned long hpte_v;
385
	unsigned long want_v;
L
Linus Torvalds 已提交
386 387 388 389
	unsigned long flags;

	local_irq_save(flags);

390
	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
391

392
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
393
	native_lock_hpte(hptep);
394
	hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
395

396 397 398 399 400 401 402
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
403
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
L
Linus Torvalds 已提交
404
		native_unlock_hpte(hptep);
405
	else
L
Linus Torvalds 已提交
406
		/* Invalidate the hpte. NOTE: this also unlocks it */
407
		hptep->v = 0;
L
Linus Torvalds 已提交
408

409
	/* Invalidate the TLB */
410 411
	tlbie(vpn, bpsize, apsize, ssize, local);

L
Linus Torvalds 已提交
412 413 414
	local_irq_restore(flags);
}

415 416
static void native_hugepage_invalidate(unsigned long vsid,
				       unsigned long addr,
417
				       unsigned char *hpte_slot_array,
418
				       int psize, int ssize)
419
{
420
	int i, lock_tlbie;
421 422 423 424 425
	struct hash_pte *hptep;
	int actual_psize = MMU_PAGE_16M;
	unsigned int max_hpte_count, valid;
	unsigned long flags, s_addr = addr;
	unsigned long hpte_v, want_v, shift;
426
	unsigned long hidx, vpn = 0, hash, slot;
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450

	shift = mmu_psize_defs[psize].shift;
	max_hpte_count = 1U << (PMD_SHIFT - shift);

	local_irq_save(flags);
	for (i = 0; i < max_hpte_count; i++) {
		valid = hpte_valid(hpte_slot_array, i);
		if (!valid)
			continue;
		hidx =  hpte_hash_index(hpte_slot_array, i);

		/* get the vpn */
		addr = s_addr + (i * (1ul << shift));
		vpn = hpt_vpn(addr, vsid, ssize);
		hash = hpt_hash(vpn, shift, ssize);
		if (hidx & _PTEIDX_SECONDARY)
			hash = ~hash;

		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
		slot += hidx & _PTEIDX_GROUP_IX;

		hptep = htab_address + slot;
		want_v = hpte_encode_avpn(vpn, psize, ssize);
		native_lock_hpte(hptep);
451
		hpte_v = be64_to_cpu(hptep->v);
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477

		/* Even if we miss, we need to invalidate the TLB */
		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
			native_unlock_hpte(hptep);
		else
			/* Invalidate the hpte. NOTE: this also unlocks it */
			hptep->v = 0;
	}
	/*
	 * Since this is a hugepage, we just need a single tlbie.
	 * use the last vpn.
	 */
	lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
	if (lock_tlbie)
		raw_spin_lock(&native_tlbie_lock);

	asm volatile("ptesync":::"memory");
	__tlbie(vpn, psize, actual_psize, ssize);
	asm volatile("eieio; tlbsync; ptesync":::"memory");

	if (lock_tlbie)
		raw_spin_unlock(&native_tlbie_lock);

	local_irq_restore(flags);
}

478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
	int i, shift;
	unsigned int mask;

	/* start from 1 ignoring MMU_PAGE_4K */
	for (i = 1; i < MMU_PAGE_COUNT; i++) {

		/* invalid penc */
		if (mmu_psize_defs[psize].penc[i] == -1)
			continue;
		/*
		 * encoding bits per actual page size
		 *        PTE LP     actual page size
		 *    rrrr rrrz		>=8KB
		 *    rrrr rrzz		>=16KB
		 *    rrrr rzzz		>=32KB
		 *    rrrr zzzz		>=64KB
		 * .......
		 */
		shift = mmu_psize_defs[i].shift - LP_SHIFT;
		if (shift > LP_BITS)
			shift = LP_BITS;
		mask = (1 << shift) - 1;
		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
			return i;
	}
	return -1;
}

508
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
509
			int *psize, int *apsize, int *ssize, unsigned long *vpn)
510
{
511
	unsigned long avpn, pteg, vpi;
512 513
	unsigned long hpte_v = be64_to_cpu(hpte->v);
	unsigned long hpte_r = be64_to_cpu(hpte->r);
514
	unsigned long vsid, seg_off;
515 516
	int size, a_size, shift;
	/* Look at the 8 bit LP value */
517
	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
518

519 520 521 522
	if (!(hpte_v & HPTE_V_LARGE)) {
		size   = MMU_PAGE_4K;
		a_size = MMU_PAGE_4K;
	} else {
523
		for (size = 0; size < MMU_PAGE_COUNT; size++) {
524

525 526 527
			/* valid entries have a shift value */
			if (!mmu_psize_defs[size].shift)
				continue;
528

529 530 531
			a_size = __hpte_actual_psize(lp, size);
			if (a_size != -1)
				break;
532 533
		}
	}
534
	/* This works for all page sizes, and for 256M and 1T segments */
535
	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
536 537
	shift = mmu_psize_defs[size].shift;

538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
	pteg = slot / HPTES_PER_GROUP;
	if (hpte_v & HPTE_V_SECONDARY)
		pteg = ~pteg;

	switch (*ssize) {
	case MMU_SEGSIZE_256M:
		/* We only have 28 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1f) << 23;
		vsid    =  avpn >> 5;
		/* We can find more bits from the pteg value */
		if (shift < 23) {
			vpi = (vsid ^ pteg) & htab_hash_mask;
			seg_off |= vpi << shift;
		}
553
		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
554
		break;
555 556 557 558 559
	case MMU_SEGSIZE_1T:
		/* We only have 40 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1ffff) << 23;
		vsid    = avpn >> 17;
		if (shift < 23) {
560
			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
561
			seg_off |= vpi << shift;
562
		}
563
		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
564
		break;
565
	default:
566
		*vpn = size = 0;
567
	}
568 569
	*psize  = size;
	*apsize = a_size;
570 571
}

572 573 574 575 576 577 578 579 580 581
/*
 * clear all mappings on kexec.  All cpus are in real mode (or they will
 * be when they isi), and we are the only one left.  We rely on our kernel
 * mapping being 0xC0's and the hardware ignoring those two real bits.
 *
 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 * athough there is the control page available...
 */
static void native_hpte_clear(void)
{
582
	unsigned long vpn = 0;
583
	unsigned long slot, slots, flags;
584
	struct hash_pte *hptep = htab_address;
585
	unsigned long hpte_v;
586
	unsigned long pteg_count;
587
	int psize, apsize, ssize;
588 589 590 591 592 593 594 595

	pteg_count = htab_hash_mask + 1;

	local_irq_save(flags);

	/* we take the tlbie lock and hold it.  Some hardware will
	 * deadlock if we try to tlbie from two processors at once.
	 */
596
	raw_spin_lock(&native_tlbie_lock);
597 598 599 600 601 602 603 604 605

	slots = pteg_count * HPTES_PER_GROUP;

	for (slot = 0; slot < slots; slot++, hptep++) {
		/*
		 * we could lock the pte here, but we are the only cpu
		 * running,  right?  and for crash dump, we probably
		 * don't want to wait for a maybe bad cpu.
		 */
606
		hpte_v = be64_to_cpu(hptep->v);
607

608 609 610 611
		/*
		 * Call __tlbie() here rather than tlbie() since we
		 * already hold the native_tlbie_lock.
		 */
612
		if (hpte_v & HPTE_V_VALID) {
613
			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
614
			hptep->v = 0;
615
			__tlbie(vpn, psize, apsize, ssize);
616 617 618
		}
	}

619
	asm volatile("eieio; tlbsync; ptesync":::"memory");
620
	raw_spin_unlock(&native_tlbie_lock);
621 622 623
	local_irq_restore(flags);
}

624 625 626 627
/*
 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 * the lock all the time
 */
628
static void native_flush_hash_range(unsigned long number, int local)
L
Linus Torvalds 已提交
629
{
630 631
	unsigned long vpn;
	unsigned long hash, index, hidx, shift, slot;
632
	struct hash_pte *hptep;
633
	unsigned long hpte_v;
634 635 636
	unsigned long want_v;
	unsigned long flags;
	real_pte_t pte;
L
Linus Torvalds 已提交
637
	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
638
	unsigned long psize = batch->psize;
P
Paul Mackerras 已提交
639
	int ssize = batch->ssize;
640
	int i;
L
Linus Torvalds 已提交
641 642 643 644

	local_irq_save(flags);

	for (i = 0; i < number; i++) {
645
		vpn = batch->vpn[i];
646 647
		pte = batch->pte[i];

648 649
		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
			hash = hpt_hash(vpn, shift, ssize);
650 651 652 653 654 655
			hidx = __rpte_to_hidx(pte, index);
			if (hidx & _PTEIDX_SECONDARY)
				hash = ~hash;
			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
			slot += hidx & _PTEIDX_GROUP_IX;
			hptep = htab_address + slot;
656
			want_v = hpte_encode_avpn(vpn, psize, ssize);
657
			native_lock_hpte(hptep);
658
			hpte_v = be64_to_cpu(hptep->v);
659 660 661 662 663 664
			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
			    !(hpte_v & HPTE_V_VALID))
				native_unlock_hpte(hptep);
			else
				hptep->v = 0;
		} pte_iterate_hashed_end();
L
Linus Torvalds 已提交
665 666
	}

667
	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
668
	    mmu_psize_defs[psize].tlbiel && local) {
L
Linus Torvalds 已提交
669
		asm volatile("ptesync":::"memory");
670
		for (i = 0; i < number; i++) {
671
			vpn = batch->vpn[i];
672 673
			pte = batch->pte[i];

674 675
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
676
				__tlbiel(vpn, psize, psize, ssize);
677 678
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
679 680
		asm volatile("ptesync":::"memory");
	} else {
681
		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
L
Linus Torvalds 已提交
682 683

		if (lock_tlbie)
684
			raw_spin_lock(&native_tlbie_lock);
L
Linus Torvalds 已提交
685 686

		asm volatile("ptesync":::"memory");
687
		for (i = 0; i < number; i++) {
688
			vpn = batch->vpn[i];
689 690
			pte = batch->pte[i];

691 692
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
693
				__tlbie(vpn, psize, psize, ssize);
694 695
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
696 697 698
		asm volatile("eieio; tlbsync; ptesync":::"memory");

		if (lock_tlbie)
699
			raw_spin_unlock(&native_tlbie_lock);
L
Linus Torvalds 已提交
700 701 702 703 704
	}

	local_irq_restore(flags);
}

705
void __init hpte_init_native(void)
L
Linus Torvalds 已提交
706 707 708 709 710
{
	ppc_md.hpte_invalidate	= native_hpte_invalidate;
	ppc_md.hpte_updatepp	= native_hpte_updatepp;
	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
	ppc_md.hpte_insert	= native_hpte_insert;
711 712
	ppc_md.hpte_remove	= native_hpte_remove;
	ppc_md.hpte_clear_all	= native_hpte_clear;
713
	ppc_md.flush_hash_range = native_flush_hash_range;
714
	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
L
Linus Torvalds 已提交
715
}