hash_native_64.c 19.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * native hashtable management.
 *
 * SMP scalability work:
 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
12 13 14

#undef DEBUG_LOW

L
Linus Torvalds 已提交
15 16
#include <linux/spinlock.h>
#include <linux/bitops.h>
17
#include <linux/of.h>
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27
#include <linux/threads.h>
#include <linux/smp.h>

#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
28
#include <asm/udbg.h>
29
#include <asm/kexec.h>
30
#include <asm/ppc-opcode.h>
31 32 33 34 35 36

#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
#else
#define DBG_LOW(fmt...)
#endif
L
Linus Torvalds 已提交
37

38
#ifdef __BIG_ENDIAN__
L
Linus Torvalds 已提交
39
#define HPTE_LOCK_BIT 3
40 41 42
#else
#define HPTE_LOCK_BIT (56+3)
#endif
L
Linus Torvalds 已提交
43

44
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
L
Linus Torvalds 已提交
45

46
static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
47
{
48
	unsigned long va;
49
	unsigned int penc;
50
	unsigned long sllp;
51

52 53 54 55 56 57 58 59 60 61 62 63 64
	/*
	 * We need 14 to 65 bits of va for a tlibe of 4K page
	 * With vpn we ignore the lower VPN_SHIFT bits already.
	 * And top two bits are already ignored because we can
	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
	 * of 12.
	 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
65 66 67 68
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
69 70
		/* clear out bits after (52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
71
		va |= ssize << 8;
72 73 74
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
75
		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
76
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
77
			     : "memory");
78 79
		break;
	default:
80
		/* We need 14 to 14 + i bits of va */
81
		penc = mmu_psize_defs[psize].penc[apsize];
82
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
83
		va |= penc << 12;
P
Paul Mackerras 已提交
84
		va |= ssize << 8;
85 86 87 88 89 90 91 92
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe); /* AVAL */
93
		va |= 1; /* L */
94
		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
95
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
96
			     : "memory");
97 98 99 100
		break;
	}
}

101
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
102
{
103
	unsigned long va;
104
	unsigned int penc;
105
	unsigned long sllp;
106

107 108 109 110 111 112 113
	/* VPN_SHIFT can be atmost 12 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64 bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
114 115 116 117
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
118 119
		/* clear out bits after(52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
120
		va |= ssize << 8;
121 122 123
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
124 125 126 127
		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
			     : : "r"(va) : "memory");
		break;
	default:
128
		/* We need 14 to 14 + i bits of va */
129
		penc = mmu_psize_defs[psize].penc[apsize];
130
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
131
		va |= penc << 12;
P
Paul Mackerras 已提交
132
		va |= ssize << 8;
133 134 135 136 137 138 139 140
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe);
141
		va |= 1; /* L */
142 143 144 145 146 147 148
		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
			     : : "r"(va) : "memory");
		break;
	}

}

149 150
static inline void tlbie(unsigned long vpn, int psize, int apsize,
			 int ssize, int local)
151
{
152 153
	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
154 155 156 157

	if (use_local)
		use_local = mmu_psize_defs[psize].tlbiel;
	if (lock_tlbie && !use_local)
158
		raw_spin_lock(&native_tlbie_lock);
159 160
	asm volatile("ptesync": : :"memory");
	if (use_local) {
161
		__tlbiel(vpn, psize, apsize, ssize);
162 163
		asm volatile("ptesync": : :"memory");
	} else {
164
		__tlbie(vpn, psize, apsize, ssize);
165 166 167
		asm volatile("eieio; tlbsync; ptesync": : :"memory");
	}
	if (lock_tlbie && !use_local)
168
		raw_spin_unlock(&native_tlbie_lock);
169 170
}

171
static inline void native_lock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
172
{
173
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
174 175

	while (1) {
176
		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
L
Linus Torvalds 已提交
177 178 179 180 181 182
			break;
		while(test_bit(HPTE_LOCK_BIT, word))
			cpu_relax();
	}
}

183
static inline void native_unlock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
184
{
185
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
186

187
	clear_bit_unlock(HPTE_LOCK_BIT, word);
L
Linus Torvalds 已提交
188 189
}

190
static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
191
			unsigned long pa, unsigned long rflags,
192
			unsigned long vflags, int psize, int apsize, int ssize)
L
Linus Torvalds 已提交
193
{
194
	struct hash_pte *hptep = htab_address + hpte_group;
195
	unsigned long hpte_v, hpte_r;
L
Linus Torvalds 已提交
196 197
	int i;

198
	if (!(vflags & HPTE_V_BOLTED)) {
199
		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
200
			" rflags=%lx, vflags=%lx, psize=%d)\n",
201
			hpte_group, vpn, pa, rflags, vflags, psize);
202 203
	}

L
Linus Torvalds 已提交
204
	for (i = 0; i < HPTES_PER_GROUP; i++) {
205
		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
L
Linus Torvalds 已提交
206 207
			/* retry with lock held */
			native_lock_hpte(hptep);
208
			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
L
Linus Torvalds 已提交
209 210 211 212 213 214 215 216 217 218
				break;
			native_unlock_hpte(hptep);
		}

		hptep++;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

219 220
	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
221 222 223 224 225

	if (!(vflags & HPTE_V_BOLTED)) {
		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
			i, hpte_v, hpte_r);
	}
L
Linus Torvalds 已提交
226

227
	hptep->r = cpu_to_be64(hpte_r);
L
Linus Torvalds 已提交
228
	/* Guarantee the second dword is visible before the valid bit */
229
	eieio();
L
Linus Torvalds 已提交
230 231 232 233
	/*
	 * Now set the first dword including the valid bit
	 * NOTE: this also unlocks the hpte
	 */
234
	hptep->v = cpu_to_be64(hpte_v);
L
Linus Torvalds 已提交
235 236 237

	__asm__ __volatile__ ("ptesync" : : : "memory");

238
	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
L
Linus Torvalds 已提交
239 240 241 242
}

static long native_hpte_remove(unsigned long hpte_group)
{
243
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
244 245
	int i;
	int slot_offset;
246
	unsigned long hpte_v;
L
Linus Torvalds 已提交
247

248 249
	DBG_LOW("    remove(group=%lx)\n", hpte_group);

L
Linus Torvalds 已提交
250 251 252 253 254
	/* pick a random entry to start at */
	slot_offset = mftb() & 0x7;

	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + hpte_group + slot_offset;
255
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
256

257
		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
L
Linus Torvalds 已提交
258 259
			/* retry with lock held */
			native_lock_hpte(hptep);
260
			hpte_v = be64_to_cpu(hptep->v);
261 262
			if ((hpte_v & HPTE_V_VALID)
			    && !(hpte_v & HPTE_V_BOLTED))
L
Linus Torvalds 已提交
263 264 265 266 267 268 269 270 271 272 273 274
				break;
			native_unlock_hpte(hptep);
		}

		slot_offset++;
		slot_offset &= 0x7;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

	/* Invalidate the hpte. NOTE: this also unlocks it */
275
	hptep->v = 0;
L
Linus Torvalds 已提交
276 277 278 279

	return i;
}

280
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
281 282
				 unsigned long vpn, int bpsize,
				 int apsize, int ssize, int local)
L
Linus Torvalds 已提交
283
{
284
	struct hash_pte *hptep = htab_address + slot;
285 286 287
	unsigned long hpte_v, want_v;
	int ret = 0;

288
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
289

290 291
	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
		vpn, want_v & HPTE_V_AVPN, slot, newpp);
292 293 294

	native_lock_hpte(hptep);

295
	hpte_v = be64_to_cpu(hptep->v);
296 297 298 299 300 301 302
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
303
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
304 305 306 307 308
		DBG_LOW(" -> miss\n");
		ret = -1;
	} else {
		DBG_LOW(" -> hit\n");
		/* Update the HPTE */
309 310
		hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
311
	}
312
	native_unlock_hpte(hptep);
313 314

	/* Ensure it is out of the tlb too. */
315
	tlbie(vpn, bpsize, apsize, ssize, local);
316 317

	return ret;
L
Linus Torvalds 已提交
318 319
}

320
static long native_hpte_find(unsigned long vpn, int psize, int ssize)
L
Linus Torvalds 已提交
321
{
322
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
323
	unsigned long hash;
P
Paul Mackerras 已提交
324
	unsigned long i;
L
Linus Torvalds 已提交
325
	long slot;
326
	unsigned long want_v, hpte_v;
L
Linus Torvalds 已提交
327

328
	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
329
	want_v = hpte_encode_avpn(vpn, psize, ssize);
L
Linus Torvalds 已提交
330

P
Paul Mackerras 已提交
331 332 333 334
	/* Bolted mappings are only ever in the primary group */
	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + slot;
335
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
336

P
Paul Mackerras 已提交
337 338 339 340
		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
			/* HPTE matches */
			return slot;
		++slot;
L
Linus Torvalds 已提交
341 342 343 344 345 346 347 348 349 350 351 352
	}

	return -1;
}

/*
 * Update the page protection bits. Intended to be used to create
 * guard pages for kernel data structures on pages which are bolted
 * in the HPT. Assumes pages being operated on will not be stolen.
 *
 * No need to lock here because we should be the only user.
 */
353
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
P
Paul Mackerras 已提交
354
				       int psize, int ssize)
L
Linus Torvalds 已提交
355
{
356 357
	unsigned long vpn;
	unsigned long vsid;
L
Linus Torvalds 已提交
358
	long slot;
359
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
360

P
Paul Mackerras 已提交
361
	vsid = get_kernel_vsid(ea, ssize);
362
	vpn = hpt_vpn(ea, vsid, ssize);
L
Linus Torvalds 已提交
363

364
	slot = native_hpte_find(vpn, psize, ssize);
L
Linus Torvalds 已提交
365 366 367 368
	if (slot == -1)
		panic("could not find page to bolt\n");
	hptep = htab_address + slot;

369
	/* Update the HPTE */
370 371 372
	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
			~(HPTE_R_PP | HPTE_R_N)) |
		(newpp & (HPTE_R_PP | HPTE_R_N)));
373 374 375 376 377
	/*
	 * Ensure it is out of the tlb too. Bolted entries base and
	 * actual page size will be same.
	 */
	tlbie(vpn, psize, psize, ssize, 0);
L
Linus Torvalds 已提交
378 379
}

380
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
381
				   int bpsize, int apsize, int ssize, int local)
L
Linus Torvalds 已提交
382
{
383
	struct hash_pte *hptep = htab_address + slot;
384
	unsigned long hpte_v;
385
	unsigned long want_v;
L
Linus Torvalds 已提交
386 387 388 389
	unsigned long flags;

	local_irq_save(flags);

390
	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
391

392
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
393
	native_lock_hpte(hptep);
394
	hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
395

396 397 398 399 400 401 402
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
403
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
L
Linus Torvalds 已提交
404
		native_unlock_hpte(hptep);
405
	else
L
Linus Torvalds 已提交
406
		/* Invalidate the hpte. NOTE: this also unlocks it */
407
		hptep->v = 0;
L
Linus Torvalds 已提交
408

409
	/* Invalidate the TLB */
410 411
	tlbie(vpn, bpsize, apsize, ssize, local);

L
Linus Torvalds 已提交
412 413 414
	local_irq_restore(flags);
}

415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
static void native_hugepage_invalidate(struct mm_struct *mm,
				       unsigned char *hpte_slot_array,
				       unsigned long addr, int psize)
{
	int ssize = 0, i;
	int lock_tlbie;
	struct hash_pte *hptep;
	int actual_psize = MMU_PAGE_16M;
	unsigned int max_hpte_count, valid;
	unsigned long flags, s_addr = addr;
	unsigned long hpte_v, want_v, shift;
	unsigned long hidx, vpn = 0, vsid, hash, slot;

	shift = mmu_psize_defs[psize].shift;
	max_hpte_count = 1U << (PMD_SHIFT - shift);

	local_irq_save(flags);
	for (i = 0; i < max_hpte_count; i++) {
		valid = hpte_valid(hpte_slot_array, i);
		if (!valid)
			continue;
		hidx =  hpte_hash_index(hpte_slot_array, i);

		/* get the vpn */
		addr = s_addr + (i * (1ul << shift));
		if (!is_kernel_addr(addr)) {
			ssize = user_segment_size(addr);
			vsid = get_vsid(mm->context.id, addr, ssize);
			WARN_ON(vsid == 0);
		} else {
			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
			ssize = mmu_kernel_ssize;
		}

		vpn = hpt_vpn(addr, vsid, ssize);
		hash = hpt_hash(vpn, shift, ssize);
		if (hidx & _PTEIDX_SECONDARY)
			hash = ~hash;

		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
		slot += hidx & _PTEIDX_GROUP_IX;

		hptep = htab_address + slot;
		want_v = hpte_encode_avpn(vpn, psize, ssize);
		native_lock_hpte(hptep);
460
		hpte_v = be64_to_cpu(hptep->v);
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486

		/* Even if we miss, we need to invalidate the TLB */
		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
			native_unlock_hpte(hptep);
		else
			/* Invalidate the hpte. NOTE: this also unlocks it */
			hptep->v = 0;
	}
	/*
	 * Since this is a hugepage, we just need a single tlbie.
	 * use the last vpn.
	 */
	lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
	if (lock_tlbie)
		raw_spin_lock(&native_tlbie_lock);

	asm volatile("ptesync":::"memory");
	__tlbie(vpn, psize, actual_psize, ssize);
	asm volatile("eieio; tlbsync; ptesync":::"memory");

	if (lock_tlbie)
		raw_spin_unlock(&native_tlbie_lock);

	local_irq_restore(flags);
}

487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
	int i, shift;
	unsigned int mask;

	/* start from 1 ignoring MMU_PAGE_4K */
	for (i = 1; i < MMU_PAGE_COUNT; i++) {

		/* invalid penc */
		if (mmu_psize_defs[psize].penc[i] == -1)
			continue;
		/*
		 * encoding bits per actual page size
		 *        PTE LP     actual page size
		 *    rrrr rrrz		>=8KB
		 *    rrrr rrzz		>=16KB
		 *    rrrr rzzz		>=32KB
		 *    rrrr zzzz		>=64KB
		 * .......
		 */
		shift = mmu_psize_defs[i].shift - LP_SHIFT;
		if (shift > LP_BITS)
			shift = LP_BITS;
		mask = (1 << shift) - 1;
		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
			return i;
	}
	return -1;
}

517
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
518
			int *psize, int *apsize, int *ssize, unsigned long *vpn)
519
{
520
	unsigned long avpn, pteg, vpi;
521 522
	unsigned long hpte_v = be64_to_cpu(hpte->v);
	unsigned long hpte_r = be64_to_cpu(hpte->r);
523
	unsigned long vsid, seg_off;
524 525
	int size, a_size, shift;
	/* Look at the 8 bit LP value */
526
	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
527

528 529 530 531
	if (!(hpte_v & HPTE_V_LARGE)) {
		size   = MMU_PAGE_4K;
		a_size = MMU_PAGE_4K;
	} else {
532
		for (size = 0; size < MMU_PAGE_COUNT; size++) {
533

534 535 536
			/* valid entries have a shift value */
			if (!mmu_psize_defs[size].shift)
				continue;
537

538 539 540
			a_size = __hpte_actual_psize(lp, size);
			if (a_size != -1)
				break;
541 542
		}
	}
543
	/* This works for all page sizes, and for 256M and 1T segments */
544
	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
545 546
	shift = mmu_psize_defs[size].shift;

547 548 549 550 551 552 553 554 555 556 557 558 559 560 561
	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
	pteg = slot / HPTES_PER_GROUP;
	if (hpte_v & HPTE_V_SECONDARY)
		pteg = ~pteg;

	switch (*ssize) {
	case MMU_SEGSIZE_256M:
		/* We only have 28 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1f) << 23;
		vsid    =  avpn >> 5;
		/* We can find more bits from the pteg value */
		if (shift < 23) {
			vpi = (vsid ^ pteg) & htab_hash_mask;
			seg_off |= vpi << shift;
		}
562
		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
563
		break;
564 565 566 567 568
	case MMU_SEGSIZE_1T:
		/* We only have 40 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1ffff) << 23;
		vsid    = avpn >> 17;
		if (shift < 23) {
569
			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
570
			seg_off |= vpi << shift;
571
		}
572
		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
573
		break;
574
	default:
575
		*vpn = size = 0;
576
	}
577 578
	*psize  = size;
	*apsize = a_size;
579 580
}

581 582 583 584 585 586 587 588 589 590
/*
 * clear all mappings on kexec.  All cpus are in real mode (or they will
 * be when they isi), and we are the only one left.  We rely on our kernel
 * mapping being 0xC0's and the hardware ignoring those two real bits.
 *
 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 * athough there is the control page available...
 */
static void native_hpte_clear(void)
{
591
	unsigned long vpn = 0;
592
	unsigned long slot, slots, flags;
593
	struct hash_pte *hptep = htab_address;
594
	unsigned long hpte_v;
595
	unsigned long pteg_count;
596
	int psize, apsize, ssize;
597 598 599 600 601 602 603 604

	pteg_count = htab_hash_mask + 1;

	local_irq_save(flags);

	/* we take the tlbie lock and hold it.  Some hardware will
	 * deadlock if we try to tlbie from two processors at once.
	 */
605
	raw_spin_lock(&native_tlbie_lock);
606 607 608 609 610 611 612 613 614

	slots = pteg_count * HPTES_PER_GROUP;

	for (slot = 0; slot < slots; slot++, hptep++) {
		/*
		 * we could lock the pte here, but we are the only cpu
		 * running,  right?  and for crash dump, we probably
		 * don't want to wait for a maybe bad cpu.
		 */
615
		hpte_v = be64_to_cpu(hptep->v);
616

617 618 619 620
		/*
		 * Call __tlbie() here rather than tlbie() since we
		 * already hold the native_tlbie_lock.
		 */
621
		if (hpte_v & HPTE_V_VALID) {
622
			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
623
			hptep->v = 0;
624
			__tlbie(vpn, psize, apsize, ssize);
625 626 627
		}
	}

628
	asm volatile("eieio; tlbsync; ptesync":::"memory");
629
	raw_spin_unlock(&native_tlbie_lock);
630 631 632
	local_irq_restore(flags);
}

633 634 635 636
/*
 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 * the lock all the time
 */
637
static void native_flush_hash_range(unsigned long number, int local)
L
Linus Torvalds 已提交
638
{
639 640
	unsigned long vpn;
	unsigned long hash, index, hidx, shift, slot;
641
	struct hash_pte *hptep;
642
	unsigned long hpte_v;
643 644 645
	unsigned long want_v;
	unsigned long flags;
	real_pte_t pte;
L
Linus Torvalds 已提交
646
	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
647
	unsigned long psize = batch->psize;
P
Paul Mackerras 已提交
648
	int ssize = batch->ssize;
649
	int i;
L
Linus Torvalds 已提交
650 651 652 653

	local_irq_save(flags);

	for (i = 0; i < number; i++) {
654
		vpn = batch->vpn[i];
655 656
		pte = batch->pte[i];

657 658
		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
			hash = hpt_hash(vpn, shift, ssize);
659 660 661 662 663 664
			hidx = __rpte_to_hidx(pte, index);
			if (hidx & _PTEIDX_SECONDARY)
				hash = ~hash;
			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
			slot += hidx & _PTEIDX_GROUP_IX;
			hptep = htab_address + slot;
665
			want_v = hpte_encode_avpn(vpn, psize, ssize);
666
			native_lock_hpte(hptep);
667
			hpte_v = be64_to_cpu(hptep->v);
668 669 670 671 672 673
			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
			    !(hpte_v & HPTE_V_VALID))
				native_unlock_hpte(hptep);
			else
				hptep->v = 0;
		} pte_iterate_hashed_end();
L
Linus Torvalds 已提交
674 675
	}

676
	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
677
	    mmu_psize_defs[psize].tlbiel && local) {
L
Linus Torvalds 已提交
678
		asm volatile("ptesync":::"memory");
679
		for (i = 0; i < number; i++) {
680
			vpn = batch->vpn[i];
681 682
			pte = batch->pte[i];

683 684
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
685
				__tlbiel(vpn, psize, psize, ssize);
686 687
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
688 689
		asm volatile("ptesync":::"memory");
	} else {
690
		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
L
Linus Torvalds 已提交
691 692

		if (lock_tlbie)
693
			raw_spin_lock(&native_tlbie_lock);
L
Linus Torvalds 已提交
694 695

		asm volatile("ptesync":::"memory");
696
		for (i = 0; i < number; i++) {
697
			vpn = batch->vpn[i];
698 699
			pte = batch->pte[i];

700 701
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
702
				__tlbie(vpn, psize, psize, ssize);
703 704
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
705 706 707
		asm volatile("eieio; tlbsync; ptesync":::"memory");

		if (lock_tlbie)
708
			raw_spin_unlock(&native_tlbie_lock);
L
Linus Torvalds 已提交
709 710 711 712 713
	}

	local_irq_restore(flags);
}

714
void __init hpte_init_native(void)
L
Linus Torvalds 已提交
715 716 717 718 719
{
	ppc_md.hpte_invalidate	= native_hpte_invalidate;
	ppc_md.hpte_updatepp	= native_hpte_updatepp;
	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
	ppc_md.hpte_insert	= native_hpte_insert;
720 721
	ppc_md.hpte_remove	= native_hpte_remove;
	ppc_md.hpte_clear_all	= native_hpte_clear;
722
	ppc_md.flush_hash_range = native_flush_hash_range;
723
	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
L
Linus Torvalds 已提交
724
}