hash_native_64.c 18.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * native hashtable management.
 *
 * SMP scalability work:
 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
12 13 14

#undef DEBUG_LOW

L
Linus Torvalds 已提交
15 16
#include <linux/spinlock.h>
#include <linux/bitops.h>
17
#include <linux/of.h>
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27
#include <linux/threads.h>
#include <linux/smp.h>

#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
28
#include <asm/udbg.h>
29
#include <asm/kexec.h>
30
#include <asm/ppc-opcode.h>
31 32 33 34 35 36

#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
#else
#define DBG_LOW(fmt...)
#endif
L
Linus Torvalds 已提交
37

38
#ifdef __BIG_ENDIAN__
L
Linus Torvalds 已提交
39
#define HPTE_LOCK_BIT 3
40 41 42
#else
#define HPTE_LOCK_BIT (56+3)
#endif
L
Linus Torvalds 已提交
43

44
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
L
Linus Torvalds 已提交
45

46
static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
47
{
48
	unsigned long va;
49
	unsigned int penc;
50
	unsigned long sllp;
51

52 53 54 55 56 57 58 59 60 61 62 63 64
	/*
	 * We need 14 to 65 bits of va for a tlibe of 4K page
	 * With vpn we ignore the lower VPN_SHIFT bits already.
	 * And top two bits are already ignored because we can
	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
	 * of 12.
	 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
65 66 67 68
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
69 70
		/* clear out bits after (52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
71
		va |= ssize << 8;
72 73 74
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
75
		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
76
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
77
			     : "memory");
78 79
		break;
	default:
80
		/* We need 14 to 14 + i bits of va */
81
		penc = mmu_psize_defs[psize].penc[apsize];
82
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
83
		va |= penc << 12;
P
Paul Mackerras 已提交
84
		va |= ssize << 8;
85 86 87 88 89 90 91 92
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe); /* AVAL */
93
		va |= 1; /* L */
94
		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
95
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
96
			     : "memory");
97 98 99 100
		break;
	}
}

101
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
102
{
103
	unsigned long va;
104
	unsigned int penc;
105
	unsigned long sllp;
106

107 108 109 110 111 112 113
	/* VPN_SHIFT can be atmost 12 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64 bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
114 115 116 117
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
118 119
		/* clear out bits after(52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
120
		va |= ssize << 8;
121 122 123
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
124 125 126 127
		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
			     : : "r"(va) : "memory");
		break;
	default:
128
		/* We need 14 to 14 + i bits of va */
129
		penc = mmu_psize_defs[psize].penc[apsize];
130
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
131
		va |= penc << 12;
P
Paul Mackerras 已提交
132
		va |= ssize << 8;
133 134 135 136 137 138 139 140
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe);
141
		va |= 1; /* L */
142 143 144 145 146 147 148
		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
			     : : "r"(va) : "memory");
		break;
	}

}

149 150
static inline void tlbie(unsigned long vpn, int psize, int apsize,
			 int ssize, int local)
151
{
152 153
	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
154 155 156 157

	if (use_local)
		use_local = mmu_psize_defs[psize].tlbiel;
	if (lock_tlbie && !use_local)
158
		raw_spin_lock(&native_tlbie_lock);
159 160
	asm volatile("ptesync": : :"memory");
	if (use_local) {
161
		__tlbiel(vpn, psize, apsize, ssize);
162 163
		asm volatile("ptesync": : :"memory");
	} else {
164
		__tlbie(vpn, psize, apsize, ssize);
165 166 167
		asm volatile("eieio; tlbsync; ptesync": : :"memory");
	}
	if (lock_tlbie && !use_local)
168
		raw_spin_unlock(&native_tlbie_lock);
169 170
}

171
static inline void native_lock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
172
{
173
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
174 175

	while (1) {
176
		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
L
Linus Torvalds 已提交
177 178 179 180 181 182
			break;
		while(test_bit(HPTE_LOCK_BIT, word))
			cpu_relax();
	}
}

183
static inline void native_unlock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
184
{
185
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
186

187
	clear_bit_unlock(HPTE_LOCK_BIT, word);
L
Linus Torvalds 已提交
188 189
}

190
static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
191
			unsigned long pa, unsigned long rflags,
192
			unsigned long vflags, int psize, int apsize, int ssize)
L
Linus Torvalds 已提交
193
{
194
	struct hash_pte *hptep = htab_address + hpte_group;
195
	unsigned long hpte_v, hpte_r;
L
Linus Torvalds 已提交
196 197
	int i;

198
	if (!(vflags & HPTE_V_BOLTED)) {
199
		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
200
			" rflags=%lx, vflags=%lx, psize=%d)\n",
201
			hpte_group, vpn, pa, rflags, vflags, psize);
202 203
	}

L
Linus Torvalds 已提交
204
	for (i = 0; i < HPTES_PER_GROUP; i++) {
205
		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
L
Linus Torvalds 已提交
206 207
			/* retry with lock held */
			native_lock_hpte(hptep);
208
			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
L
Linus Torvalds 已提交
209 210 211 212 213 214 215 216 217 218
				break;
			native_unlock_hpte(hptep);
		}

		hptep++;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

219 220
	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
221 222 223 224 225

	if (!(vflags & HPTE_V_BOLTED)) {
		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
			i, hpte_v, hpte_r);
	}
L
Linus Torvalds 已提交
226

227
	hptep->r = cpu_to_be64(hpte_r);
L
Linus Torvalds 已提交
228
	/* Guarantee the second dword is visible before the valid bit */
229
	eieio();
L
Linus Torvalds 已提交
230 231 232 233
	/*
	 * Now set the first dword including the valid bit
	 * NOTE: this also unlocks the hpte
	 */
234
	hptep->v = cpu_to_be64(hpte_v);
L
Linus Torvalds 已提交
235 236 237

	__asm__ __volatile__ ("ptesync" : : : "memory");

238
	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
L
Linus Torvalds 已提交
239 240 241 242
}

static long native_hpte_remove(unsigned long hpte_group)
{
243
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
244 245
	int i;
	int slot_offset;
246
	unsigned long hpte_v;
L
Linus Torvalds 已提交
247

248 249
	DBG_LOW("    remove(group=%lx)\n", hpte_group);

L
Linus Torvalds 已提交
250 251 252 253 254
	/* pick a random entry to start at */
	slot_offset = mftb() & 0x7;

	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + hpte_group + slot_offset;
255
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
256

257
		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
L
Linus Torvalds 已提交
258 259
			/* retry with lock held */
			native_lock_hpte(hptep);
260
			hpte_v = be64_to_cpu(hptep->v);
261 262
			if ((hpte_v & HPTE_V_VALID)
			    && !(hpte_v & HPTE_V_BOLTED))
L
Linus Torvalds 已提交
263 264 265 266 267 268 269 270 271 272 273 274
				break;
			native_unlock_hpte(hptep);
		}

		slot_offset++;
		slot_offset &= 0x7;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

	/* Invalidate the hpte. NOTE: this also unlocks it */
275
	hptep->v = 0;
L
Linus Torvalds 已提交
276 277 278 279

	return i;
}

280
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
281 282
				 unsigned long vpn, int bpsize,
				 int apsize, int ssize, int local)
L
Linus Torvalds 已提交
283
{
284
	struct hash_pte *hptep = htab_address + slot;
285 286 287
	unsigned long hpte_v, want_v;
	int ret = 0;

288
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
289

290 291
	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
		vpn, want_v & HPTE_V_AVPN, slot, newpp);
292 293 294

	native_lock_hpte(hptep);

295
	hpte_v = be64_to_cpu(hptep->v);
296 297 298 299 300 301 302
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
303
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
304 305 306 307 308
		DBG_LOW(" -> miss\n");
		ret = -1;
	} else {
		DBG_LOW(" -> hit\n");
		/* Update the HPTE */
309 310
		hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
311
	}
312
	native_unlock_hpte(hptep);
313 314

	/* Ensure it is out of the tlb too. */
315
	tlbie(vpn, bpsize, apsize, ssize, local);
316 317

	return ret;
L
Linus Torvalds 已提交
318 319
}

320
static long native_hpte_find(unsigned long vpn, int psize, int ssize)
L
Linus Torvalds 已提交
321
{
322
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
323
	unsigned long hash;
P
Paul Mackerras 已提交
324
	unsigned long i;
L
Linus Torvalds 已提交
325
	long slot;
326
	unsigned long want_v, hpte_v;
L
Linus Torvalds 已提交
327

328
	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
329
	want_v = hpte_encode_avpn(vpn, psize, ssize);
L
Linus Torvalds 已提交
330

P
Paul Mackerras 已提交
331 332 333 334
	/* Bolted mappings are only ever in the primary group */
	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + slot;
335
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
336

P
Paul Mackerras 已提交
337 338 339 340
		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
			/* HPTE matches */
			return slot;
		++slot;
L
Linus Torvalds 已提交
341 342 343 344 345 346 347 348 349 350 351 352
	}

	return -1;
}

/*
 * Update the page protection bits. Intended to be used to create
 * guard pages for kernel data structures on pages which are bolted
 * in the HPT. Assumes pages being operated on will not be stolen.
 *
 * No need to lock here because we should be the only user.
 */
353
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
P
Paul Mackerras 已提交
354
				       int psize, int ssize)
L
Linus Torvalds 已提交
355
{
356 357
	unsigned long vpn;
	unsigned long vsid;
L
Linus Torvalds 已提交
358
	long slot;
359
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
360

P
Paul Mackerras 已提交
361
	vsid = get_kernel_vsid(ea, ssize);
362
	vpn = hpt_vpn(ea, vsid, ssize);
L
Linus Torvalds 已提交
363

364
	slot = native_hpte_find(vpn, psize, ssize);
L
Linus Torvalds 已提交
365 366 367 368
	if (slot == -1)
		panic("could not find page to bolt\n");
	hptep = htab_address + slot;

369
	/* Update the HPTE */
370 371 372
	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
			~(HPTE_R_PP | HPTE_R_N)) |
		(newpp & (HPTE_R_PP | HPTE_R_N)));
373 374 375 376 377
	/*
	 * Ensure it is out of the tlb too. Bolted entries base and
	 * actual page size will be same.
	 */
	tlbie(vpn, psize, psize, ssize, 0);
L
Linus Torvalds 已提交
378 379
}

380
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
381
				   int bpsize, int apsize, int ssize, int local)
L
Linus Torvalds 已提交
382
{
383
	struct hash_pte *hptep = htab_address + slot;
384
	unsigned long hpte_v;
385
	unsigned long want_v;
L
Linus Torvalds 已提交
386 387 388 389
	unsigned long flags;

	local_irq_save(flags);

390
	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
391

392
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
393
	native_lock_hpte(hptep);
394
	hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
395

396 397 398 399 400 401 402
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
403
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
L
Linus Torvalds 已提交
404
		native_unlock_hpte(hptep);
405
	else
L
Linus Torvalds 已提交
406
		/* Invalidate the hpte. NOTE: this also unlocks it */
407
		hptep->v = 0;
L
Linus Torvalds 已提交
408

409
	/* Invalidate the TLB */
410 411
	tlbie(vpn, bpsize, apsize, ssize, local);

L
Linus Torvalds 已提交
412 413 414
	local_irq_restore(flags);
}

415 416
static void native_hugepage_invalidate(unsigned long vsid,
				       unsigned long addr,
417
				       unsigned char *hpte_slot_array,
418
				       int psize, int ssize)
419
{
420
	int i;
421 422 423 424 425
	struct hash_pte *hptep;
	int actual_psize = MMU_PAGE_16M;
	unsigned int max_hpte_count, valid;
	unsigned long flags, s_addr = addr;
	unsigned long hpte_v, want_v, shift;
426
	unsigned long hidx, vpn = 0, hash, slot;
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450

	shift = mmu_psize_defs[psize].shift;
	max_hpte_count = 1U << (PMD_SHIFT - shift);

	local_irq_save(flags);
	for (i = 0; i < max_hpte_count; i++) {
		valid = hpte_valid(hpte_slot_array, i);
		if (!valid)
			continue;
		hidx =  hpte_hash_index(hpte_slot_array, i);

		/* get the vpn */
		addr = s_addr + (i * (1ul << shift));
		vpn = hpt_vpn(addr, vsid, ssize);
		hash = hpt_hash(vpn, shift, ssize);
		if (hidx & _PTEIDX_SECONDARY)
			hash = ~hash;

		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
		slot += hidx & _PTEIDX_GROUP_IX;

		hptep = htab_address + slot;
		want_v = hpte_encode_avpn(vpn, psize, ssize);
		native_lock_hpte(hptep);
451
		hpte_v = be64_to_cpu(hptep->v);
452 453 454 455 456 457 458

		/* Even if we miss, we need to invalidate the TLB */
		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
			native_unlock_hpte(hptep);
		else
			/* Invalidate the hpte. NOTE: this also unlocks it */
			hptep->v = 0;
459 460 461 462 463 464
		/*
		 * We need to do tlb invalidate for all the address, tlbie
		 * instruction compares entry_VA in tlb with the VA specified
		 * here
		 */
		tlbie(vpn, psize, actual_psize, ssize, 0);
465 466 467 468
	}
	local_irq_restore(flags);
}

469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
	int i, shift;
	unsigned int mask;

	/* start from 1 ignoring MMU_PAGE_4K */
	for (i = 1; i < MMU_PAGE_COUNT; i++) {

		/* invalid penc */
		if (mmu_psize_defs[psize].penc[i] == -1)
			continue;
		/*
		 * encoding bits per actual page size
		 *        PTE LP     actual page size
		 *    rrrr rrrz		>=8KB
		 *    rrrr rrzz		>=16KB
		 *    rrrr rzzz		>=32KB
		 *    rrrr zzzz		>=64KB
		 * .......
		 */
		shift = mmu_psize_defs[i].shift - LP_SHIFT;
		if (shift > LP_BITS)
			shift = LP_BITS;
		mask = (1 << shift) - 1;
		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
			return i;
	}
	return -1;
}

499
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
500
			int *psize, int *apsize, int *ssize, unsigned long *vpn)
501
{
502
	unsigned long avpn, pteg, vpi;
503 504
	unsigned long hpte_v = be64_to_cpu(hpte->v);
	unsigned long hpte_r = be64_to_cpu(hpte->r);
505
	unsigned long vsid, seg_off;
506 507
	int size, a_size, shift;
	/* Look at the 8 bit LP value */
508
	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
509

510 511 512 513
	if (!(hpte_v & HPTE_V_LARGE)) {
		size   = MMU_PAGE_4K;
		a_size = MMU_PAGE_4K;
	} else {
514
		for (size = 0; size < MMU_PAGE_COUNT; size++) {
515

516 517 518
			/* valid entries have a shift value */
			if (!mmu_psize_defs[size].shift)
				continue;
519

520 521 522
			a_size = __hpte_actual_psize(lp, size);
			if (a_size != -1)
				break;
523 524
		}
	}
525
	/* This works for all page sizes, and for 256M and 1T segments */
526
	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
527 528
	shift = mmu_psize_defs[size].shift;

529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
	pteg = slot / HPTES_PER_GROUP;
	if (hpte_v & HPTE_V_SECONDARY)
		pteg = ~pteg;

	switch (*ssize) {
	case MMU_SEGSIZE_256M:
		/* We only have 28 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1f) << 23;
		vsid    =  avpn >> 5;
		/* We can find more bits from the pteg value */
		if (shift < 23) {
			vpi = (vsid ^ pteg) & htab_hash_mask;
			seg_off |= vpi << shift;
		}
544
		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
545
		break;
546 547 548 549 550
	case MMU_SEGSIZE_1T:
		/* We only have 40 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1ffff) << 23;
		vsid    = avpn >> 17;
		if (shift < 23) {
551
			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
552
			seg_off |= vpi << shift;
553
		}
554
		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
555
		break;
556
	default:
557
		*vpn = size = 0;
558
	}
559 560
	*psize  = size;
	*apsize = a_size;
561 562
}

563 564 565 566 567 568 569 570 571 572
/*
 * clear all mappings on kexec.  All cpus are in real mode (or they will
 * be when they isi), and we are the only one left.  We rely on our kernel
 * mapping being 0xC0's and the hardware ignoring those two real bits.
 *
 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 * athough there is the control page available...
 */
static void native_hpte_clear(void)
{
573
	unsigned long vpn = 0;
574
	unsigned long slot, slots, flags;
575
	struct hash_pte *hptep = htab_address;
576
	unsigned long hpte_v;
577
	unsigned long pteg_count;
578
	int psize, apsize, ssize;
579 580 581 582 583 584 585 586

	pteg_count = htab_hash_mask + 1;

	local_irq_save(flags);

	/* we take the tlbie lock and hold it.  Some hardware will
	 * deadlock if we try to tlbie from two processors at once.
	 */
587
	raw_spin_lock(&native_tlbie_lock);
588 589 590 591 592 593 594 595 596

	slots = pteg_count * HPTES_PER_GROUP;

	for (slot = 0; slot < slots; slot++, hptep++) {
		/*
		 * we could lock the pte here, but we are the only cpu
		 * running,  right?  and for crash dump, we probably
		 * don't want to wait for a maybe bad cpu.
		 */
597
		hpte_v = be64_to_cpu(hptep->v);
598

599 600 601 602
		/*
		 * Call __tlbie() here rather than tlbie() since we
		 * already hold the native_tlbie_lock.
		 */
603
		if (hpte_v & HPTE_V_VALID) {
604
			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
605
			hptep->v = 0;
606
			__tlbie(vpn, psize, apsize, ssize);
607 608 609
		}
	}

610
	asm volatile("eieio; tlbsync; ptesync":::"memory");
611
	raw_spin_unlock(&native_tlbie_lock);
612 613 614
	local_irq_restore(flags);
}

615 616 617 618
/*
 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 * the lock all the time
 */
619
static void native_flush_hash_range(unsigned long number, int local)
L
Linus Torvalds 已提交
620
{
621 622
	unsigned long vpn;
	unsigned long hash, index, hidx, shift, slot;
623
	struct hash_pte *hptep;
624
	unsigned long hpte_v;
625 626 627
	unsigned long want_v;
	unsigned long flags;
	real_pte_t pte;
L
Linus Torvalds 已提交
628
	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
629
	unsigned long psize = batch->psize;
P
Paul Mackerras 已提交
630
	int ssize = batch->ssize;
631
	int i;
L
Linus Torvalds 已提交
632 633 634 635

	local_irq_save(flags);

	for (i = 0; i < number; i++) {
636
		vpn = batch->vpn[i];
637 638
		pte = batch->pte[i];

639 640
		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
			hash = hpt_hash(vpn, shift, ssize);
641 642 643 644 645 646
			hidx = __rpte_to_hidx(pte, index);
			if (hidx & _PTEIDX_SECONDARY)
				hash = ~hash;
			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
			slot += hidx & _PTEIDX_GROUP_IX;
			hptep = htab_address + slot;
647
			want_v = hpte_encode_avpn(vpn, psize, ssize);
648
			native_lock_hpte(hptep);
649
			hpte_v = be64_to_cpu(hptep->v);
650 651 652 653 654 655
			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
			    !(hpte_v & HPTE_V_VALID))
				native_unlock_hpte(hptep);
			else
				hptep->v = 0;
		} pte_iterate_hashed_end();
L
Linus Torvalds 已提交
656 657
	}

658
	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
659
	    mmu_psize_defs[psize].tlbiel && local) {
L
Linus Torvalds 已提交
660
		asm volatile("ptesync":::"memory");
661
		for (i = 0; i < number; i++) {
662
			vpn = batch->vpn[i];
663 664
			pte = batch->pte[i];

665 666
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
667
				__tlbiel(vpn, psize, psize, ssize);
668 669
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
670 671
		asm volatile("ptesync":::"memory");
	} else {
672
		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
L
Linus Torvalds 已提交
673 674

		if (lock_tlbie)
675
			raw_spin_lock(&native_tlbie_lock);
L
Linus Torvalds 已提交
676 677

		asm volatile("ptesync":::"memory");
678
		for (i = 0; i < number; i++) {
679
			vpn = batch->vpn[i];
680 681
			pte = batch->pte[i];

682 683
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
684
				__tlbie(vpn, psize, psize, ssize);
685 686
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
687 688 689
		asm volatile("eieio; tlbsync; ptesync":::"memory");

		if (lock_tlbie)
690
			raw_spin_unlock(&native_tlbie_lock);
L
Linus Torvalds 已提交
691 692 693 694 695
	}

	local_irq_restore(flags);
}

696
void __init hpte_init_native(void)
L
Linus Torvalds 已提交
697 698 699 700 701
{
	ppc_md.hpte_invalidate	= native_hpte_invalidate;
	ppc_md.hpte_updatepp	= native_hpte_updatepp;
	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
	ppc_md.hpte_insert	= native_hpte_insert;
702 703
	ppc_md.hpte_remove	= native_hpte_remove;
	ppc_md.hpte_clear_all	= native_hpte_clear;
704
	ppc_md.flush_hash_range = native_flush_hash_range;
705
	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
L
Linus Torvalds 已提交
706
}