hash_native_64.c 12.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * native hashtable management.
 *
 * SMP scalability work:
 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
12 13 14

#undef DEBUG_LOW

L
Linus Torvalds 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/threads.h>
#include <linux/smp.h>

#include <asm/abs_addr.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
28 29 30 31 32 33 34
#include <asm/udbg.h>

#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
#else
#define DBG_LOW(fmt...)
#endif
L
Linus Torvalds 已提交
35 36 37 38 39

#define HPTE_LOCK_BIT 3

static DEFINE_SPINLOCK(native_tlbie_lock);

40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
static inline void __tlbie(unsigned long va, unsigned int psize)
{
	unsigned int penc;

	/* clear top 16 bits, non SLS segment */
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
		va &= ~0xffful;
		asm volatile("tlbie %0,0" : : "r" (va) : "memory");
		break;
	default:
		penc = mmu_psize_defs[psize].penc;
		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
55
		va |= penc << 12;
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
		asm volatile("tlbie %0,1" : : "r" (va) : "memory");
		break;
	}
}

static inline void __tlbiel(unsigned long va, unsigned int psize)
{
	unsigned int penc;

	/* clear top 16 bits, non SLS segment */
	va &= ~(0xffffULL << 48);

	switch (psize) {
	case MMU_PAGE_4K:
		va &= ~0xffful;
		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
			     : : "r"(va) : "memory");
		break;
	default:
		penc = mmu_psize_defs[psize].penc;
		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
77
		va |= penc << 12;
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
			     : : "r"(va) : "memory");
		break;
	}

}

static inline void tlbie(unsigned long va, int psize, int local)
{
	unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);

	if (use_local)
		use_local = mmu_psize_defs[psize].tlbiel;
	if (lock_tlbie && !use_local)
		spin_lock(&native_tlbie_lock);
	asm volatile("ptesync": : :"memory");
	if (use_local) {
		__tlbiel(va, psize);
		asm volatile("ptesync": : :"memory");
	} else {
		__tlbie(va, psize);
		asm volatile("eieio; tlbsync; ptesync": : :"memory");
	}
	if (lock_tlbie && !use_local)
		spin_unlock(&native_tlbie_lock);
}

106
static inline void native_lock_hpte(hpte_t *hptep)
L
Linus Torvalds 已提交
107
{
108
	unsigned long *word = &hptep->v;
L
Linus Torvalds 已提交
109 110 111 112 113 114 115 116 117

	while (1) {
		if (!test_and_set_bit(HPTE_LOCK_BIT, word))
			break;
		while(test_bit(HPTE_LOCK_BIT, word))
			cpu_relax();
	}
}

118
static inline void native_unlock_hpte(hpte_t *hptep)
L
Linus Torvalds 已提交
119
{
120
	unsigned long *word = &hptep->v;
L
Linus Torvalds 已提交
121 122 123 124 125 126

	asm volatile("lwsync":::"memory");
	clear_bit(HPTE_LOCK_BIT, word);
}

long native_hpte_insert(unsigned long hpte_group, unsigned long va,
127 128
			unsigned long pa, unsigned long rflags,
			unsigned long vflags, int psize)
L
Linus Torvalds 已提交
129
{
130 131
	hpte_t *hptep = htab_address + hpte_group;
	unsigned long hpte_v, hpte_r;
L
Linus Torvalds 已提交
132 133
	int i;

134 135 136 137 138 139
	if (!(vflags & HPTE_V_BOLTED)) {
		DBG_LOW("    insert(group=%lx, va=%016lx, pa=%016lx,"
			" rflags=%lx, vflags=%lx, psize=%d)\n",
			hpte_group, va, pa, rflags, vflags, psize);
	}

L
Linus Torvalds 已提交
140
	for (i = 0; i < HPTES_PER_GROUP; i++) {
141
		if (! (hptep->v & HPTE_V_VALID)) {
L
Linus Torvalds 已提交
142 143
			/* retry with lock held */
			native_lock_hpte(hptep);
144
			if (! (hptep->v & HPTE_V_VALID))
L
Linus Torvalds 已提交
145 146 147 148 149 150 151 152 153 154
				break;
			native_unlock_hpte(hptep);
		}

		hptep++;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

155 156 157 158 159 160 161
	hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
	hpte_r = hpte_encode_r(pa, psize) | rflags;

	if (!(vflags & HPTE_V_BOLTED)) {
		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
			i, hpte_v, hpte_r);
	}
L
Linus Torvalds 已提交
162

163
	hptep->r = hpte_r;
L
Linus Torvalds 已提交
164 165 166 167 168 169
	/* Guarantee the second dword is visible before the valid bit */
	__asm__ __volatile__ ("eieio" : : : "memory");
	/*
	 * Now set the first dword including the valid bit
	 * NOTE: this also unlocks the hpte
	 */
170
	hptep->v = hpte_v;
L
Linus Torvalds 已提交
171 172 173

	__asm__ __volatile__ ("ptesync" : : : "memory");

174
	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
L
Linus Torvalds 已提交
175 176 177 178
}

static long native_hpte_remove(unsigned long hpte_group)
{
179
	hpte_t *hptep;
L
Linus Torvalds 已提交
180 181
	int i;
	int slot_offset;
182
	unsigned long hpte_v;
L
Linus Torvalds 已提交
183

184 185
	DBG_LOW("    remove(group=%lx)\n", hpte_group);

L
Linus Torvalds 已提交
186 187 188 189 190
	/* pick a random entry to start at */
	slot_offset = mftb() & 0x7;

	for (i = 0; i < HPTES_PER_GROUP; i++) {
		hptep = htab_address + hpte_group + slot_offset;
191
		hpte_v = hptep->v;
L
Linus Torvalds 已提交
192

193
		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
L
Linus Torvalds 已提交
194 195
			/* retry with lock held */
			native_lock_hpte(hptep);
196 197 198
			hpte_v = hptep->v;
			if ((hpte_v & HPTE_V_VALID)
			    && !(hpte_v & HPTE_V_BOLTED))
L
Linus Torvalds 已提交
199 200 201 202 203 204 205 206 207 208 209 210
				break;
			native_unlock_hpte(hptep);
		}

		slot_offset++;
		slot_offset &= 0x7;
	}

	if (i == HPTES_PER_GROUP)
		return -1;

	/* Invalidate the hpte. NOTE: this also unlocks it */
211
	hptep->v = 0;
L
Linus Torvalds 已提交
212 213 214 215

	return i;
}

216 217
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
				 unsigned long va, int psize, int local)
L
Linus Torvalds 已提交
218
{
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
	hpte_t *hptep = htab_address + slot;
	unsigned long hpte_v, want_v;
	int ret = 0;

	want_v = hpte_encode_v(va, psize);

	DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
		va, want_v & HPTE_V_AVPN, slot, newpp);

	native_lock_hpte(hptep);

	hpte_v = hptep->v;

	/* Even if we miss, we need to invalidate the TLB */
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
		DBG_LOW(" -> miss\n");
		native_unlock_hpte(hptep);
		ret = -1;
	} else {
		DBG_LOW(" -> hit\n");
		/* Update the HPTE */
		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
241
			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
242 243 244 245 246 247 248
		native_unlock_hpte(hptep);
	}

	/* Ensure it is out of the tlb too. */
	tlbie(va, psize, local);

	return ret;
L
Linus Torvalds 已提交
249 250
}

251
static long native_hpte_find(unsigned long va, int psize)
L
Linus Torvalds 已提交
252
{
253
	hpte_t *hptep;
L
Linus Torvalds 已提交
254 255 256
	unsigned long hash;
	unsigned long i, j;
	long slot;
257
	unsigned long want_v, hpte_v;
L
Linus Torvalds 已提交
258

259 260
	hash = hpt_hash(va, mmu_psize_defs[psize].shift);
	want_v = hpte_encode_v(va, psize);
L
Linus Torvalds 已提交
261 262 263 264 265

	for (j = 0; j < 2; j++) {
		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
		for (i = 0; i < HPTES_PER_GROUP; i++) {
			hptep = htab_address + slot;
266
			hpte_v = hptep->v;
L
Linus Torvalds 已提交
267

268
			if (HPTE_V_COMPARE(hpte_v, want_v)
269 270
			    && (hpte_v & HPTE_V_VALID)
			    && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
L
Linus Torvalds 已提交
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
				/* HPTE matches */
				if (j)
					slot = -slot;
				return slot;
			}
			++slot;
		}
		hash = ~hash;
	}

	return -1;
}

/*
 * Update the page protection bits. Intended to be used to create
 * guard pages for kernel data structures on pages which are bolted
 * in the HPT. Assumes pages being operated on will not be stolen.
 *
 * No need to lock here because we should be the only user.
 */
291 292
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
				       int psize)
L
Linus Torvalds 已提交
293
{
294
	unsigned long vsid, va;
L
Linus Torvalds 已提交
295
	long slot;
296
	hpte_t *hptep;
L
Linus Torvalds 已提交
297 298 299 300

	vsid = get_kernel_vsid(ea);
	va = (vsid << 28) | (ea & 0x0fffffff);

301
	slot = native_hpte_find(va, psize);
L
Linus Torvalds 已提交
302 303 304 305
	if (slot == -1)
		panic("could not find page to bolt\n");
	hptep = htab_address + slot;

306 307 308
	/* Update the HPTE */
	hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
		(newpp & (HPTE_R_PP | HPTE_R_N));
L
Linus Torvalds 已提交
309

310 311
	/* Ensure it is out of the tlb too. */
	tlbie(va, psize, 0);
L
Linus Torvalds 已提交
312 313 314
}

static void native_hpte_invalidate(unsigned long slot, unsigned long va,
315
				   int psize, int local)
L
Linus Torvalds 已提交
316
{
317 318
	hpte_t *hptep = htab_address + slot;
	unsigned long hpte_v;
319
	unsigned long want_v;
L
Linus Torvalds 已提交
320 321 322 323
	unsigned long flags;

	local_irq_save(flags);

324 325 326 327
	DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);

	want_v = hpte_encode_v(va, psize);
	native_lock_hpte(hptep);
328
	hpte_v = hptep->v;
L
Linus Torvalds 已提交
329 330

	/* Even if we miss, we need to invalidate the TLB */
331
	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
L
Linus Torvalds 已提交
332
		native_unlock_hpte(hptep);
333
	else
L
Linus Torvalds 已提交
334
		/* Invalidate the hpte. NOTE: this also unlocks it */
335
		hptep->v = 0;
L
Linus Torvalds 已提交
336

337 338 339
	/* Invalidate the TLB */
	tlbie(va, psize, local);

L
Linus Torvalds 已提交
340 341 342
	local_irq_restore(flags);
}

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
/*
 * XXX This need fixing based on page size. It's only used by
 * native_hpte_clear() for now which needs fixing too so they
 * make a good pair...
 */
static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
{
	unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
	unsigned long va;

	va = avpn << 23;

	if (! (hpte_v & HPTE_V_LARGE)) {
		unsigned long vpi, pteg;

		pteg = slot / HPTES_PER_GROUP;
		if (hpte_v & HPTE_V_SECONDARY)
			pteg = ~pteg;

		vpi = ((va >> 28) ^ pteg) & htab_hash_mask;

		va |= vpi << PAGE_SHIFT;
	}

	return va;
}

370 371 372 373 374 375 376
/*
 * clear all mappings on kexec.  All cpus are in real mode (or they will
 * be when they isi), and we are the only one left.  We rely on our kernel
 * mapping being 0xC0's and the hardware ignoring those two real bits.
 *
 * TODO: add batching support when enabled.  remember, no dynamic memory here,
 * athough there is the control page available...
377 378
 *
 * XXX FIXME: 4k only for now !
379 380 381 382
 */
static void native_hpte_clear(void)
{
	unsigned long slot, slots, flags;
383 384
	hpte_t *hptep = htab_address;
	unsigned long hpte_v;
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
	unsigned long pteg_count;

	pteg_count = htab_hash_mask + 1;

	local_irq_save(flags);

	/* we take the tlbie lock and hold it.  Some hardware will
	 * deadlock if we try to tlbie from two processors at once.
	 */
	spin_lock(&native_tlbie_lock);

	slots = pteg_count * HPTES_PER_GROUP;

	for (slot = 0; slot < slots; slot++, hptep++) {
		/*
		 * we could lock the pte here, but we are the only cpu
		 * running,  right?  and for crash dump, we probably
		 * don't want to wait for a maybe bad cpu.
		 */
404
		hpte_v = hptep->v;
405

406 407 408 409
		/*
		 * Call __tlbie() here rather than tlbie() since we
		 * already hold the native_tlbie_lock.
		 */
410 411
		if (hpte_v & HPTE_V_VALID) {
			hptep->v = 0;
412
			__tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K);
413 414 415
		}
	}

416
	asm volatile("eieio; tlbsync; ptesync":::"memory");
417 418 419 420
	spin_unlock(&native_tlbie_lock);
	local_irq_restore(flags);
}

421 422 423 424
/*
 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
 * the lock all the time
 */
425
static void native_flush_hash_range(unsigned long number, int local)
L
Linus Torvalds 已提交
426
{
427
	unsigned long va, hash, index, hidx, shift, slot;
428 429
	hpte_t *hptep;
	unsigned long hpte_v;
430 431 432
	unsigned long want_v;
	unsigned long flags;
	real_pte_t pte;
L
Linus Torvalds 已提交
433
	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
434 435
	unsigned long psize = batch->psize;
	int i;
L
Linus Torvalds 已提交
436 437 438 439

	local_irq_save(flags);

	for (i = 0; i < number; i++) {
440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
		va = batch->vaddr[i];
		pte = batch->pte[i];

		pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
			hash = hpt_hash(va, shift);
			hidx = __rpte_to_hidx(pte, index);
			if (hidx & _PTEIDX_SECONDARY)
				hash = ~hash;
			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
			slot += hidx & _PTEIDX_GROUP_IX;
			hptep = htab_address + slot;
			want_v = hpte_encode_v(va, psize);
			native_lock_hpte(hptep);
			hpte_v = hptep->v;
			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
			    !(hpte_v & HPTE_V_VALID))
				native_unlock_hpte(hptep);
			else
				hptep->v = 0;
		} pte_iterate_hashed_end();
L
Linus Torvalds 已提交
460 461
	}

462 463
	if (cpu_has_feature(CPU_FTR_TLBIEL) &&
	    mmu_psize_defs[psize].tlbiel && local) {
L
Linus Torvalds 已提交
464
		asm volatile("ptesync":::"memory");
465 466 467 468 469 470 471 472 473
		for (i = 0; i < number; i++) {
			va = batch->vaddr[i];
			pte = batch->pte[i];

			pte_iterate_hashed_subpages(pte, psize, va, index,
						    shift) {
				__tlbiel(va, psize);
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
474 475 476 477 478 479 480 481
		asm volatile("ptesync":::"memory");
	} else {
		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);

		if (lock_tlbie)
			spin_lock(&native_tlbie_lock);

		asm volatile("ptesync":::"memory");
482 483 484 485 486 487 488 489 490
		for (i = 0; i < number; i++) {
			va = batch->vaddr[i];
			pte = batch->pte[i];

			pte_iterate_hashed_subpages(pte, psize, va, index,
						    shift) {
				__tlbie(va, psize);
			} pte_iterate_hashed_end();
		}
L
Linus Torvalds 已提交
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
		asm volatile("eieio; tlbsync; ptesync":::"memory");

		if (lock_tlbie)
			spin_unlock(&native_tlbie_lock);
	}

	local_irq_restore(flags);
}

#ifdef CONFIG_PPC_PSERIES
/* Disable TLB batching on nighthawk */
static inline int tlb_batching_enabled(void)
{
	struct device_node *root = of_find_node_by_path("/");
	int enabled = 1;

	if (root) {
		const char *model = get_property(root, "model", NULL);
		if (model && !strcmp(model, "IBM,9076-N81"))
			enabled = 0;
		of_node_put(root);
	}

	return enabled;
}
#else
static inline int tlb_batching_enabled(void)
{
	return 1;
}
#endif

523
void __init hpte_init_native(void)
L
Linus Torvalds 已提交
524 525 526 527 528
{
	ppc_md.hpte_invalidate	= native_hpte_invalidate;
	ppc_md.hpte_updatepp	= native_hpte_updatepp;
	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
	ppc_md.hpte_insert	= native_hpte_insert;
529 530
	ppc_md.hpte_remove	= native_hpte_remove;
	ppc_md.hpte_clear_all	= native_hpte_clear;
L
Linus Torvalds 已提交
531 532 533
	if (tlb_batching_enabled())
		ppc_md.flush_hash_range = native_flush_hash_range;
}