hash_native_64.c 19.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * native hashtable management.
 *
 * SMP scalability work:
 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
12 13 14

#undef DEBUG_LOW

L
Linus Torvalds 已提交
15 16
#include <linux/spinlock.h>
#include <linux/bitops.h>
17
#include <linux/of.h>
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27
#include <linux/threads.h>
#include <linux/smp.h>

#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
28
#include <asm/udbg.h>
29
#include <asm/kexec.h>
30
#include <asm/ppc-opcode.h>
31

32
#include <misc/cxl-base.h>
I
Ian Munsie 已提交
33

34 35 36 37 38
#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
#else
#define DBG_LOW(fmt...)
#endif
L
Linus Torvalds 已提交
39

40
#ifdef __BIG_ENDIAN__
L
Linus Torvalds 已提交
41
#define HPTE_LOCK_BIT 3
42 43 44
#else
#define HPTE_LOCK_BIT (56+3)
#endif
L
Linus Torvalds 已提交
45

46
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
L
Linus Torvalds 已提交
47

48
static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
49
{
50
	unsigned long va;
51
	unsigned int penc;
52
	unsigned long sllp;
53

54 55 56 57 58 59 60 61 62 63 64 65 66
	/*
	 * We need 14 to 65 bits of va for a tlibe of 4K page
	 * With vpn we ignore the lower VPN_SHIFT bits already.
	 * And top two bits are already ignored because we can
	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
	 * of 12.
	 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
67 68 69 70
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
71 72
		/* clear out bits after (52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
73
		va |= ssize << 8;
74 75 76
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
77
		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
78
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
79
			     : "memory");
80 81
		break;
	default:
82
		/* We need 14 to 14 + i bits of va */
83
		penc = mmu_psize_defs[psize].penc[apsize];
84
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
85
		va |= penc << 12;
P
Paul Mackerras 已提交
86
		va |= ssize << 8;
87 88 89 90 91 92 93 94
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe); /* AVAL */
95
		va |= 1; /* L */
96
		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
97
			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
98
			     : "memory");
99 100 101 102
		break;
	}
}

103
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
104
{
105
	unsigned long va;
106
	unsigned int penc;
107
	unsigned long sllp;
108

109 110 111 112 113 114 115
	/* VPN_SHIFT can be atmost 12 */
	va = vpn << VPN_SHIFT;
	/*
	 * clear top 16 bits of 64 bit va, non SLS segment
	 * Older versions of the architecture (2.02 and earler) require the
	 * masking of the top 16 bits.
	 */
116 117 118 119
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
120 121
		/* clear out bits after(52) [0....52.....63] */
		va &= ~((1ul << (64 - 52)) - 1);
P
Paul Mackerras 已提交
122
		va |= ssize << 8;
123 124 125
		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
		va |= sllp << 5;
126 127 128 129
		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
			     : : "r"(va) : "memory");
		break;
	default:
130
		/* We need 14 to 14 + i bits of va */
131
		penc = mmu_psize_defs[psize].penc[apsize];
132
		va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
133
		va |= penc << 12;
P
Paul Mackerras 已提交
134
		va |= ssize << 8;
135 136 137 138 139 140 141 142
		/*
		 * AVAL bits:
		 * We don't need all the bits, but rest of the bits
		 * must be ignored by the processor.
		 * vpn cover upto 65 bits of va. (0...65) and we need
		 * 58..64 bits of va.
		 */
		va |= (vpn & 0xfe);
143
		va |= 1; /* L */
144 145 146 147 148 149 150
		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
			     : : "r"(va) : "memory");
		break;
	}

}

151 152
static inline void tlbie(unsigned long vpn, int psize, int apsize,
			 int ssize, int local)
153
{
I
Ian Munsie 已提交
154
	unsigned int use_local;
155
	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
156

I
Ian Munsie 已提交
157 158
	use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();

159 160 161
	if (use_local)
		use_local = mmu_psize_defs[psize].tlbiel;
	if (lock_tlbie && !use_local)
162
		raw_spin_lock(&native_tlbie_lock);
163 164
	asm volatile("ptesync": : :"memory");
	if (use_local) {
165
		__tlbiel(vpn, psize, apsize, ssize);
166 167
		asm volatile("ptesync": : :"memory");
	} else {
168
		__tlbie(vpn, psize, apsize, ssize);
169 170 171
		asm volatile("eieio; tlbsync; ptesync": : :"memory");
	}
	if (lock_tlbie && !use_local)
172
		raw_spin_unlock(&native_tlbie_lock);
173 174
}

175
static inline void native_lock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
176
{
177
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
178 179

	while (1) {
180
		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
L
Linus Torvalds 已提交
181 182 183 184 185 186
			break;
		while(test_bit(HPTE_LOCK_BIT, word))
			cpu_relax();
	}
}

187
static inline void native_unlock_hpte(struct hash_pte *hptep)
L
Linus Torvalds 已提交
188
{
189
	unsigned long *word = (unsigned long *)&hptep->v;
L
Linus Torvalds 已提交
190

191
	clear_bit_unlock(HPTE_LOCK_BIT, word);
L
Linus Torvalds 已提交
192 193
}

194
static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
195
			unsigned long pa, unsigned long rflags,
196
			unsigned long vflags, int psize, int apsize, int ssize)
L
Linus Torvalds 已提交
197
{
198
	struct hash_pte *hptep = htab_address + hpte_group;
199
	unsigned long hpte_v, hpte_r;
L
Linus Torvalds 已提交
200 201
	int i;

202
	if (!(vflags & HPTE_V_BOLTED)) {
203
		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
204
			" rflags=%lx, vflags=%lx, psize=%d)\n",
205
			hpte_group, vpn, pa, rflags, vflags, psize);
206 207
	}

L
Linus Torvalds 已提交
208
	for (i = 0; i < HPTES_PER_GROUP; i++) {
209
		if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) {
L
Linus Torvalds 已提交
210 211
			/* retry with lock held */
			native_lock_hpte(hptep);
212
			if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID))
L
Linus Torvalds 已提交
213 214 215 216 217 218 219 220 221 222
				break;
			native_unlock_hpte(hptep);
		}

		hptep++;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

223
	hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
224
	hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
225 226 227 228 229

	if (!(vflags & HPTE_V_BOLTED)) {
		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
			i, hpte_v, hpte_r);
	}
L
Linus Torvalds 已提交
230

231
	hptep->r = cpu_to_be64(hpte_r);
L
Linus Torvalds 已提交
232
	/* Guarantee the second dword is visible before the valid bit */
233
	eieio();
L
Linus Torvalds 已提交
234 235 236 237
	/*
	 * Now set the first dword including the valid bit
	 * NOTE: this also unlocks the hpte
	 */
238
	hptep->v = cpu_to_be64(hpte_v);
L
Linus Torvalds 已提交
239 240 241

	__asm__ __volatile__ ("ptesync" : : : "memory");

242
	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
L
Linus Torvalds 已提交
243 244 245 246
}

static long native_hpte_remove(unsigned long hpte_group)
{
247
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
248 249
	int i;
	int slot_offset;
250
	unsigned long hpte_v;
L
Linus Torvalds 已提交
251

252 253
	DBG_LOW("    remove(group=%lx)\n", hpte_group);

L
Linus Torvalds 已提交
254 255 256 257 258
	/* pick a random entry to start at */
	slot_offset = mftb() & 0x7;

	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + hpte_group + slot_offset;
259
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
260

261
		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
L
Linus Torvalds 已提交
262 263
			/* retry with lock held */
			native_lock_hpte(hptep);
264
			hpte_v = be64_to_cpu(hptep->v);
265 266
			if ((hpte_v & HPTE_V_VALID)
			    && !(hpte_v & HPTE_V_BOLTED))
L
Linus Torvalds 已提交
267 268 269 270 271 272 273 274 275 276 277 278
				break;
			native_unlock_hpte(hptep);
		}

		slot_offset++;
		slot_offset &= 0x7;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

	/* Invalidate the hpte. NOTE: this also unlocks it */
279
	hptep->v = 0;
L
Linus Torvalds 已提交
280 281 282 283

	return i;
}

284
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
285
				 unsigned long vpn, int bpsize,
286
				 int apsize, int ssize, unsigned long flags)
L
Linus Torvalds 已提交
287
{
288
	struct hash_pte *hptep = htab_address + slot;
289
	unsigned long hpte_v, want_v;
290
	int ret = 0, local = 0;
291

292
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
293

294 295
	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
		vpn, want_v & HPTE_V_AVPN, slot, newpp);
296

297
	hpte_v = be64_to_cpu(hptep->v);
298 299 300 301 302 303 304
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
305
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
306 307 308
		DBG_LOW(" -> miss\n");
		ret = -1;
	} else {
309 310 311 312 313 314 315 316 317 318
		native_lock_hpte(hptep);
		/* recheck with locks held */
		hpte_v = be64_to_cpu(hptep->v);
		if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
			     !(hpte_v & HPTE_V_VALID))) {
			ret = -1;
		} else {
			DBG_LOW(" -> hit\n");
			/* Update the HPTE */
			hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
319 320
						~(HPTE_R_PPP | HPTE_R_N)) |
					       (newpp & (HPTE_R_PPP | HPTE_R_N |
321 322 323
							 HPTE_R_C)));
		}
		native_unlock_hpte(hptep);
324
	}
325 326 327 328 329 330 331 332 333

	if (flags & HPTE_LOCAL_UPDATE)
		local = 1;
	/*
	 * Ensure it is out of the tlb too if it is not a nohpte fault
	 */
	if (!(flags & HPTE_NOHPTE_UPDATE))
		tlbie(vpn, bpsize, apsize, ssize, local);

334
	return ret;
L
Linus Torvalds 已提交
335 336
}

337
static long native_hpte_find(unsigned long vpn, int psize, int ssize)
L
Linus Torvalds 已提交
338
{
339
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
340
	unsigned long hash;
P
Paul Mackerras 已提交
341
	unsigned long i;
L
Linus Torvalds 已提交
342
	long slot;
343
	unsigned long want_v, hpte_v;
L
Linus Torvalds 已提交
344

345
	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
346
	want_v = hpte_encode_avpn(vpn, psize, ssize);
L
Linus Torvalds 已提交
347

P
Paul Mackerras 已提交
348 349 350 351
	/* Bolted mappings are only ever in the primary group */
	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + slot;
352
		hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
353

P
Paul Mackerras 已提交
354 355 356 357
		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
			/* HPTE matches */
			return slot;
		++slot;
L
Linus Torvalds 已提交
358 359 360 361 362 363 364 365 366 367 368 369
	}

	return -1;
}

/*
 * Update the page protection bits. Intended to be used to create
 * guard pages for kernel data structures on pages which are bolted
 * in the HPT. Assumes pages being operated on will not be stolen.
 *
 * No need to lock here because we should be the only user.
 */
370
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
P
Paul Mackerras 已提交
371
				       int psize, int ssize)
L
Linus Torvalds 已提交
372
{
373 374
	unsigned long vpn;
	unsigned long vsid;
L
Linus Torvalds 已提交
375
	long slot;
376
	struct hash_pte *hptep;
L
Linus Torvalds 已提交
377

P
Paul Mackerras 已提交
378
	vsid = get_kernel_vsid(ea, ssize);
379
	vpn = hpt_vpn(ea, vsid, ssize);
L
Linus Torvalds 已提交
380

381
	slot = native_hpte_find(vpn, psize, ssize);
L
Linus Torvalds 已提交
382 383 384 385
	if (slot == -1)
		panic("could not find page to bolt\n");
	hptep = htab_address + slot;

386
	/* Update the HPTE */
387
	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
388 389
				~(HPTE_R_PPP | HPTE_R_N)) |
			       (newpp & (HPTE_R_PPP | HPTE_R_N)));
390 391 392 393 394
	/*
	 * Ensure it is out of the tlb too. Bolted entries base and
	 * actual page size will be same.
	 */
	tlbie(vpn, psize, psize, ssize, 0);
L
Linus Torvalds 已提交
395 396
}

397
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
398
				   int bpsize, int apsize, int ssize, int local)
L
Linus Torvalds 已提交
399
{
400
	struct hash_pte *hptep = htab_address + slot;
401
	unsigned long hpte_v;
402
	unsigned long want_v;
L
Linus Torvalds 已提交
403 404 405 406
	unsigned long flags;

	local_irq_save(flags);

407
	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
408

409
	want_v = hpte_encode_avpn(vpn, bpsize, ssize);
410
	native_lock_hpte(hptep);
411
	hpte_v = be64_to_cpu(hptep->v);
L
Linus Torvalds 已提交
412

413 414 415 416 417 418 419
	/*
	 * We need to invalidate the TLB always because hpte_remove doesn't do
	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
	 * random entry from it. When we do that we don't invalidate the TLB
	 * (hpte_remove) because we assume the old translation is still
	 * technically "valid".
	 */
420
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
L
Linus Torvalds 已提交
421
		native_unlock_hpte(hptep);
422
	else
L
Linus Torvalds 已提交
423
		/* Invalidate the hpte. NOTE: this also unlocks it */
424
		hptep->v = 0;
L
Linus Torvalds 已提交
425

426
	/* Invalidate the TLB */
427 428
	tlbie(vpn, bpsize, apsize, ssize, local);

L
Linus Torvalds 已提交
429 430 431
	local_irq_restore(flags);
}

432
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
433 434
static void native_hugepage_invalidate(unsigned long vsid,
				       unsigned long addr,
435
				       unsigned char *hpte_slot_array,
436
				       int psize, int ssize, int local)
437
{
438
	int i;
439 440 441 442 443
	struct hash_pte *hptep;
	int actual_psize = MMU_PAGE_16M;
	unsigned int max_hpte_count, valid;
	unsigned long flags, s_addr = addr;
	unsigned long hpte_v, want_v, shift;
444
	unsigned long hidx, vpn = 0, hash, slot;
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468

	shift = mmu_psize_defs[psize].shift;
	max_hpte_count = 1U << (PMD_SHIFT - shift);

	local_irq_save(flags);
	for (i = 0; i < max_hpte_count; i++) {
		valid = hpte_valid(hpte_slot_array, i);
		if (!valid)
			continue;
		hidx =  hpte_hash_index(hpte_slot_array, i);

		/* get the vpn */
		addr = s_addr + (i * (1ul << shift));
		vpn = hpt_vpn(addr, vsid, ssize);
		hash = hpt_hash(vpn, shift, ssize);
		if (hidx & _PTEIDX_SECONDARY)
			hash = ~hash;

		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
		slot += hidx & _PTEIDX_GROUP_IX;

		hptep = htab_address + slot;
		want_v = hpte_encode_avpn(vpn, psize, ssize);
		native_lock_hpte(hptep);
469
		hpte_v = be64_to_cpu(hptep->v);
470 471 472 473 474 475 476

		/* Even if we miss, we need to invalidate the TLB */
		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
			native_unlock_hpte(hptep);
		else
			/* Invalidate the hpte. NOTE: this also unlocks it */
			hptep->v = 0;
477 478 479 480 481
		/*
		 * We need to do tlb invalidate for all the address, tlbie
		 * instruction compares entry_VA in tlb with the VA specified
		 * here
		 */
482
		tlbie(vpn, psize, actual_psize, ssize, local);
483 484 485
	}
	local_irq_restore(flags);
}
486 487 488 489 490 491 492 493 494
#else
static void native_hugepage_invalidate(unsigned long vsid,
				       unsigned long addr,
				       unsigned char *hpte_slot_array,
				       int psize, int ssize, int local)
{
	WARN(1, "%s called without THP support\n", __func__);
}
#endif
495

496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
	int i, shift;
	unsigned int mask;

	/* start from 1 ignoring MMU_PAGE_4K */
	for (i = 1; i < MMU_PAGE_COUNT; i++) {

		/* invalid penc */
		if (mmu_psize_defs[psize].penc[i] == -1)
			continue;
		/*
		 * encoding bits per actual page size
		 *        PTE LP     actual page size
		 *    rrrr rrrz		>=8KB
		 *    rrrr rrzz		>=16KB
		 *    rrrr rzzz		>=32KB
		 *    rrrr zzzz		>=64KB
		 * .......
		 */
		shift = mmu_psize_defs[i].shift - LP_SHIFT;
		if (shift > LP_BITS)
			shift = LP_BITS;
		mask = (1 << shift) - 1;
		if ((lp & mask) == mmu_psize_defs[psize].penc[i])
			return i;
	}
	return -1;
}

526
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
527
			int *psize, int *apsize, int *ssize, unsigned long *vpn)
528
{
529
	unsigned long avpn, pteg, vpi;
530 531
	unsigned long hpte_v = be64_to_cpu(hpte->v);
	unsigned long hpte_r = be64_to_cpu(hpte->r);
532
	unsigned long vsid, seg_off;
533 534
	int size, a_size, shift;
	/* Look at the 8 bit LP value */
535
	unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
536

537 538 539 540
	if (!(hpte_v & HPTE_V_LARGE)) {
		size   = MMU_PAGE_4K;
		a_size = MMU_PAGE_4K;
	} else {
541
		for (size = 0; size < MMU_PAGE_COUNT; size++) {
542

543 544 545
			/* valid entries have a shift value */
			if (!mmu_psize_defs[size].shift)
				continue;
546

547 548 549
			a_size = __hpte_actual_psize(lp, size);
			if (a_size != -1)
				break;
550 551
		}
	}
552
	/* This works for all page sizes, and for 256M and 1T segments */
553 554 555 556 557
	if (cpu_has_feature(CPU_FTR_ARCH_300))
		*ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
	else
		*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;

558 559
	shift = mmu_psize_defs[size].shift;

560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
	pteg = slot / HPTES_PER_GROUP;
	if (hpte_v & HPTE_V_SECONDARY)
		pteg = ~pteg;

	switch (*ssize) {
	case MMU_SEGSIZE_256M:
		/* We only have 28 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1f) << 23;
		vsid    =  avpn >> 5;
		/* We can find more bits from the pteg value */
		if (shift < 23) {
			vpi = (vsid ^ pteg) & htab_hash_mask;
			seg_off |= vpi << shift;
		}
575
		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
576
		break;
577 578 579 580 581
	case MMU_SEGSIZE_1T:
		/* We only have 40 - 23 bits of seg_off in avpn */
		seg_off = (avpn & 0x1ffff) << 23;
		vsid    = avpn >> 17;
		if (shift < 23) {
582
			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
583
			seg_off |= vpi << shift;
584
		}
585
		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
586
		break;
587
	default:
588
		*vpn = size = 0;
589
	}
590 591
	*psize  = size;
	*apsize = a_size;
592 593
}

594 595 596 597 598
/*
 * clear all mappings on kexec.  All cpus are in real mode (or they will
 * be when they isi), and we are the only one left.  We rely on our kernel
 * mapping being 0xC0's and the hardware ignoring those two real bits.
 *
599 600 601 602 603 604 605 606
 * This must be called with interrupts disabled.
 *
 * Taking the native_tlbie_lock is unsafe here due to the possibility of
 * lockdep being on. On pre POWER5 hardware, not taking the lock could
 * cause deadlock. POWER5 and newer not taking the lock is fine. This only
 * gets called during boot before secondary CPUs have come up and during
 * crashdump and all bets are off anyway.
 *
607 608 609 610 611
 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 * athough there is the control page available...
 */
static void native_hpte_clear(void)
{
612
	unsigned long vpn = 0;
613
	unsigned long slot, slots;
614
	struct hash_pte *hptep = htab_address;
615
	unsigned long hpte_v;
616
	unsigned long pteg_count;
617
	int psize, apsize, ssize;
618 619 620 621 622 623 624 625 626 627 628

	pteg_count = htab_hash_mask + 1;

	slots = pteg_count * HPTES_PER_GROUP;

	for (slot = 0; slot < slots; slot++, hptep++) {
		/*
		 * we could lock the pte here, but we are the only cpu
		 * running,  right?  and for crash dump, we probably
		 * don't want to wait for a maybe bad cpu.
		 */
629
		hpte_v = be64_to_cpu(hptep->v);
630

631
		/*
632 633
		 * Call __tlbie() here rather than tlbie() since we can't take the
		 * native_tlbie_lock.
634
		 */
635
		if (hpte_v & HPTE_V_VALID) {
636
			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
637
			hptep->v = 0;
638
			__tlbie(vpn, psize, apsize, ssize);
639 640 641
		}
	}

642
	asm volatile("eieio; tlbsync; ptesync":::"memory");
643 644
}

645 646 647 648
/*
 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 * the lock all the time
 */
649
static void native_flush_hash_range(unsigned long number, int local)
L
Linus Torvalds 已提交
650
{
651 652
	unsigned long vpn;
	unsigned long hash, index, hidx, shift, slot;
653
	struct hash_pte *hptep;
654
	unsigned long hpte_v;
655 656 657
	unsigned long want_v;
	unsigned long flags;
	real_pte_t pte;
658
	struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
659
	unsigned long psize = batch->psize;
P
Paul Mackerras 已提交
660
	int ssize = batch->ssize;
661
	int i;
L
Linus Torvalds 已提交
662 663 664 665

	local_irq_save(flags);

	for (i = 0; i < number; i++) {
666
		vpn = batch->vpn[i];
667 668
		pte = batch->pte[i];

669 670
		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
			hash = hpt_hash(vpn, shift, ssize);
671 672 673 674 675 676
			hidx = __rpte_to_hidx(pte, index);
			if (hidx & _PTEIDX_SECONDARY)
				hash = ~hash;
			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
			slot += hidx & _PTEIDX_GROUP_IX;
			hptep = htab_address + slot;
677
			want_v = hpte_encode_avpn(vpn, psize, ssize);
678
			native_lock_hpte(hptep);
679
			hpte_v = be64_to_cpu(hptep->v);
680 681 682 683 684 685
			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
			    !(hpte_v & HPTE_V_VALID))
				native_unlock_hpte(hptep);
			else
				hptep->v = 0;
		} pte_iterate_hashed_end();
L
Linus Torvalds 已提交
686 687
	}

688
	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
689
	    mmu_psize_defs[psize].tlbiel && local) {
L
Linus Torvalds 已提交
690
		asm volatile("ptesync":::"memory");
691
		for (i = 0; i < number; i++) {
692
			vpn = batch->vpn[i];
693 694
			pte = batch->pte[i];

695 696
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
697
				__tlbiel(vpn, psize, psize, ssize);
698 699
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
700 701
		asm volatile("ptesync":::"memory");
	} else {
702
		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
L
Linus Torvalds 已提交
703 704

		if (lock_tlbie)
705
			raw_spin_lock(&native_tlbie_lock);
L
Linus Torvalds 已提交
706 707

		asm volatile("ptesync":::"memory");
708
		for (i = 0; i < number; i++) {
709
			vpn = batch->vpn[i];
710 711
			pte = batch->pte[i];

712 713
			pte_iterate_hashed_subpages(pte, psize,
						    vpn, index, shift) {
714
				__tlbie(vpn, psize, psize, ssize);
715 716
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
717 718 719
		asm volatile("eieio; tlbsync; ptesync":::"memory");

		if (lock_tlbie)
720
			raw_spin_unlock(&native_tlbie_lock);
L
Linus Torvalds 已提交
721 722 723 724 725
	}

	local_irq_restore(flags);
}

726 727 728 729 730 731
static int native_update_partition_table(u64 patb1)
{
	partition_tb->patb1 = cpu_to_be64(patb1);
	return 0;
}

732
void __init hpte_init_native(void)
L
Linus Torvalds 已提交
733 734 735 736 737
{
	ppc_md.hpte_invalidate	= native_hpte_invalidate;
	ppc_md.hpte_updatepp	= native_hpte_updatepp;
	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
	ppc_md.hpte_insert	= native_hpte_insert;
738 739
	ppc_md.hpte_remove	= native_hpte_remove;
	ppc_md.hpte_clear_all	= native_hpte_clear;
740
	ppc_md.flush_hash_range = native_flush_hash_range;
741
	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
742 743 744

	if (cpu_has_feature(CPU_FTR_ARCH_300))
		ppc_md.update_partition_table = native_update_partition_table;
L
Linus Torvalds 已提交
745
}