slb.c 21.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4
/*
 * PowerPC64 SLB support.
 *
 * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
5
 * Based on earlier code written by:
L
Linus Torvalds 已提交
6 7 8 9 10 11 12 13 14 15 16
 * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
 *    Copyright (c) 2001 Dave Engebretsen
 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
 *
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

17
#include <asm/asm-prototypes.h>
L
Linus Torvalds 已提交
18 19 20 21
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/paca.h>
22
#include <asm/ppc-opcode.h>
L
Linus Torvalds 已提交
23
#include <asm/cputable.h>
24
#include <asm/cacheflush.h>
25 26
#include <asm/smp.h>
#include <linux/compiler.h>
27
#include <linux/context_tracking.h>
28 29
#include <linux/mm_types.h>

30
#include <asm/udbg.h>
31
#include <asm/code-patching.h>
32

33 34
enum slb_index {
	LINEAR_INDEX	= 0, /* Kernel linear map  (0xc000000000000000) */
35
	KSTACK_INDEX	= 1, /* Kernel stack map */
36
};
L
Linus Torvalds 已提交
37

38
static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
L
Linus Torvalds 已提交
39

40 41 42
#define slb_esid_mask(ssize)	\
	(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)

P
Paul Mackerras 已提交
43
static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
44
					 enum slb_index index)
L
Linus Torvalds 已提交
45
{
46
	return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
L
Linus Torvalds 已提交
47 48
}

49
static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
50 51
					 unsigned long flags)
{
52
	return (vsid << slb_vsid_shift(ssize)) | flags |
53
		((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
54 55
}

56 57 58 59 60 61
static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
					 unsigned long flags)
{
	return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
}

62
static void assert_slb_presence(bool present, unsigned long ea)
63 64 65 66 67 68
{
#ifdef CONFIG_DEBUG_VM
	unsigned long tmp;

	WARN_ON_ONCE(mfmsr() & MSR_EE);

69 70 71
	if (!cpu_has_feature(CPU_FTR_ARCH_206))
		return;

72 73 74 75 76
	/*
	 * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
	 * ignores all other bits from 0-27, so just clear them all.
	 */
	ea &= ~((1UL << 28) - 1);
77
	asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
78

79
	WARN_ON(present == (tmp == 0));
80 81 82
#endif
}

P
Paul Mackerras 已提交
83
static inline void slb_shadow_update(unsigned long ea, int ssize,
84
				     unsigned long flags,
85
				     enum slb_index index)
L
Linus Torvalds 已提交
86
{
87 88
	struct slb_shadow *p = get_slb_shadow();

89 90
	/*
	 * Clear the ESID first so the entry is not valid while we are
91 92
	 * updating it.  No write barriers are needed here, provided
	 * we only update the current CPU's SLB shadow buffer.
93
	 */
94 95 96
	WRITE_ONCE(p->save_area[index].esid, 0);
	WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
	WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
97 98
}

99
static inline void slb_shadow_clear(enum slb_index index)
100
{
101
	WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
L
Linus Torvalds 已提交
102 103
}

P
Paul Mackerras 已提交
104 105
static inline void create_shadowed_slbe(unsigned long ea, int ssize,
					unsigned long flags,
106
					enum slb_index index)
107 108 109 110 111 112
{
	/*
	 * Updating the shadow buffer before writing the SLB ensures
	 * we don't get a stale entry here if we get preempted by PHYP
	 * between these two statements.
	 */
113
	slb_shadow_update(ea, ssize, flags, index);
114

115
	assert_slb_presence(false, ea);
116
	asm volatile("slbmte  %0,%1" :
P
Paul Mackerras 已提交
117
		     : "r" (mk_vsid_data(ea, ssize, flags)),
118
		       "r" (mk_esid_data(ea, ssize, index))
119 120 121
		     : "memory" );
}

122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
/*
 * Insert bolted entries into SLB (which may not be empty, so don't clear
 * slb_cache_ptr).
 */
void __slb_restore_bolted_realmode(void)
{
	struct slb_shadow *p = get_slb_shadow();
	enum slb_index index;

	 /* No isync needed because realmode. */
	for (index = 0; index < SLB_NUM_BOLTED; index++) {
		asm volatile("slbmte  %0,%1" :
		     : "r" (be64_to_cpu(p->save_area[index].vsid)),
		       "r" (be64_to_cpu(p->save_area[index].esid)));
	}
137

138
	assert_slb_presence(true, local_paca->kstack);
139 140 141 142 143 144 145 146 147
}

/*
 * Insert the bolted entries into an empty SLB.
 */
void slb_restore_bolted_realmode(void)
{
	__slb_restore_bolted_realmode();
	get_paca()->slb_cache_ptr = 0;
148 149 150

	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
151 152 153 154 155 156 157 158 159 160
}

/*
 * This flushes all SLB entries including 0, so it must be realmode.
 */
void slb_flush_all_realmode(void)
{
	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
}

161 162 163 164 165
/*
 * This flushes non-bolted entries, it can be run in virtual mode. Must
 * be called with interrupts disabled.
 */
void slb_flush_and_restore_bolted(void)
L
Linus Torvalds 已提交
166
{
167 168 169
	struct slb_shadow *p = get_slb_shadow();

	BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
L
Linus Torvalds 已提交
170

171 172 173 174 175 176 177 178
	WARN_ON(!irqs_disabled());

	/*
	 * We can't take a PMU exception in the following code, so hard
	 * disable interrupts.
	 */
	hard_irq_disable();

L
Linus Torvalds 已提交
179 180
	asm volatile("isync\n"
		     "slbia\n"
181 182 183 184
		     "slbmte  %0, %1\n"
		     "isync\n"
		     :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
			"r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
L
Linus Torvalds 已提交
185
		     : "memory");
186
	assert_slb_presence(true, get_paca()->kstack);
L
Linus Torvalds 已提交
187

188
	get_paca()->slb_cache_ptr = 0;
189 190 191

	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
192 193
}

194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
void slb_save_contents(struct slb_entry *slb_ptr)
{
	int i;
	unsigned long e, v;

	/* Save slb_cache_ptr value. */
	get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;

	if (!slb_ptr)
		return;

	for (i = 0; i < mmu_slb_size; i++) {
		asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
		asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
		slb_ptr->esid = e;
		slb_ptr->vsid = v;
		slb_ptr++;
	}
}

void slb_dump_contents(struct slb_entry *slb_ptr)
{
	int i, n;
	unsigned long e, v;
	unsigned long llp;

	if (!slb_ptr)
		return;

	pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
224
	pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr);
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263

	for (i = 0; i < mmu_slb_size; i++) {
		e = slb_ptr->esid;
		v = slb_ptr->vsid;
		slb_ptr++;

		if (!e && !v)
			continue;

		pr_err("%02d %016lx %016lx\n", i, e, v);

		if (!(e & SLB_ESID_V)) {
			pr_err("\n");
			continue;
		}
		llp = v & SLB_VSID_LLP;
		if (v & SLB_VSID_B_1T) {
			pr_err("  1T  ESID=%9lx  VSID=%13lx LLP:%3lx\n",
			       GET_ESID_1T(e),
			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
		} else {
			pr_err(" 256M ESID=%9lx  VSID=%13lx LLP:%3lx\n",
			       GET_ESID(e),
			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
		}
	}
	pr_err("----------------------------------\n");

	/* Dump slb cache entires as well. */
	pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
	pr_err("Valid SLB cache entries:\n");
	n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
	for (i = 0; i < n; i++)
		pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
	pr_err("Rest of SLB cache entries:\n");
	for (i = n; i < SLB_CACHE_ENTRIES; i++)
		pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
}

264 265
void slb_vmalloc_update(void)
{
266 267 268 269
	/*
	 * vmalloc is not bolted, so just have to flush non-bolted.
	 */
	slb_flush_and_restore_bolted();
270 271
}

272
static bool preload_hit(struct thread_info *ti, unsigned long esid)
273
{
274
	unsigned char i;
275

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
	for (i = 0; i < ti->slb_preload_nr; i++) {
		unsigned char idx;

		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
		if (esid == ti->slb_preload_esid[idx])
			return true;
	}
	return false;
}

static bool preload_add(struct thread_info *ti, unsigned long ea)
{
	unsigned char idx;
	unsigned long esid;

	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
		/* EAs are stored >> 28 so 256MB segments don't need clearing */
		if (ea & ESID_MASK_1T)
			ea &= ESID_MASK_1T;
	}
296

297
	esid = ea >> SID_SHIFT;
298

299 300
	if (preload_hit(ti, esid))
		return false;
301

302 303 304 305 306 307
	idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
	ti->slb_preload_esid[idx] = esid;
	if (ti->slb_preload_nr == SLB_PRELOAD_NR)
		ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
	else
		ti->slb_preload_nr++;
308

309
	return true;
310 311
}

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
static void preload_age(struct thread_info *ti)
{
	if (!ti->slb_preload_nr)
		return;
	ti->slb_preload_nr--;
	ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
}

void slb_setup_new_exec(void)
{
	struct thread_info *ti = current_thread_info();
	struct mm_struct *mm = current->mm;
	unsigned long exec = 0x10000000;

	WARN_ON(irqs_disabled());

	/*
	 * preload cache can only be used to determine whether a SLB
	 * entry exists if it does not start to overflow.
	 */
	if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
		return;

	hard_irq_disable();

	/*
	 * We have no good place to clear the slb preload cache on exec,
	 * flush_thread is about the earliest arch hook but that happens
	 * after we switch to the mm and have aleady preloaded the SLBEs.
	 *
	 * For the most part that's probably okay to use entries from the
	 * previous exec, they will age out if unused. It may turn out to
	 * be an advantage to clear the cache before switching to it,
	 * however.
	 */

	/*
	 * preload some userspace segments into the SLB.
	 * Almost all 32 and 64bit PowerPC executables are linked at
	 * 0x10000000 so it makes sense to preload this segment.
	 */
	if (!is_kernel_addr(exec)) {
		if (preload_add(ti, exec))
			slb_allocate_user(mm, exec);
	}

	/* Libraries and mmaps. */
	if (!is_kernel_addr(mm->mmap_base)) {
		if (preload_add(ti, mm->mmap_base))
			slb_allocate_user(mm, mm->mmap_base);
	}

	/* see switch_slb */
	asm volatile("isync" : : : "memory");

	local_irq_enable();
}

void preload_new_slb_context(unsigned long start, unsigned long sp)
{
	struct thread_info *ti = current_thread_info();
	struct mm_struct *mm = current->mm;
	unsigned long heap = mm->start_brk;

	WARN_ON(irqs_disabled());

	/* see above */
	if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
		return;

	hard_irq_disable();

	/* Userspace entry address. */
	if (!is_kernel_addr(start)) {
		if (preload_add(ti, start))
			slb_allocate_user(mm, start);
	}

	/* Top of stack, grows down. */
	if (!is_kernel_addr(sp)) {
		if (preload_add(ti, sp))
			slb_allocate_user(mm, sp);
	}

	/* Bottom of heap, grows up. */
	if (heap && !is_kernel_addr(heap)) {
		if (preload_add(ti, heap))
			slb_allocate_user(mm, heap);
	}

	/* see switch_slb */
	asm volatile("isync" : : : "memory");

	local_irq_enable();
}


L
Linus Torvalds 已提交
409 410 411
/* Flush all user entries from the segment table of the current processor. */
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
{
412 413
	struct thread_info *ti = task_thread_info(tsk);
	unsigned char i;
L
Linus Torvalds 已提交
414

415 416 417 418 419 420 421
	/*
	 * We need interrupts hard-disabled here, not just soft-disabled,
	 * so that a PMU interrupt can't occur, which might try to access
	 * user memory (to get a stack trace) and possible cause an SLB miss
	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
	 */
	hard_irq_disable();
422
	asm volatile("isync" : : : "memory");
423 424 425 426 427 428 429
	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
		/*
		 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
		 * associated lookaside structures, which matches what
		 * switch_slb wants. So ARCH_300 does not use the slb
		 * cache.
		 */
430
		asm volatile(PPC_SLBIA(3));
431 432 433 434 435 436 437 438
	} else {
		unsigned long offset = get_paca()->slb_cache_ptr;

		if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
		    offset <= SLB_CACHE_ENTRIES) {
			unsigned long slbie_data = 0;

			for (i = 0; i < offset; i++) {
439 440 441
				unsigned long ea;

				ea = (unsigned long)
442
					get_paca()->slb_cache[i] << SID_SHIFT;
443
				/*
444 445 446
				 * Could assert_slb_presence(true) here, but
				 * hypervisor or machine check could have come
				 * in and removed the entry at this point.
447 448 449
				 */

				slbie_data = ea;
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
				slbie_data |= user_segment_size(slbie_data)
						<< SLBIE_SSIZE_SHIFT;
				slbie_data |= SLBIE_C; /* user slbs have C=1 */
				asm volatile("slbie %0" : : "r" (slbie_data));
			}

			/* Workaround POWER5 < DD2.1 issue */
			if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
				asm volatile("slbie %0" : : "r" (slbie_data));

		} else {
			struct slb_shadow *p = get_slb_shadow();
			unsigned long ksp_esid_data =
				be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
			unsigned long ksp_vsid_data =
				be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);

467
			asm volatile(PPC_SLBIA(1) "\n"
468 469 470 471
				     "slbmte	%0,%1\n"
				     "isync"
				     :: "r"(ksp_vsid_data),
					"r"(ksp_esid_data));
472 473

			get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
L
Linus Torvalds 已提交
474 475
		}

476
		get_paca()->slb_cache_ptr = 0;
477
	}
478
	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
479 480

	copy_mm_to_paca(mm);
L
Linus Torvalds 已提交
481 482

	/*
483 484 485 486
	 * We gradually age out SLBs after a number of context switches to
	 * reduce reload overhead of unused entries (like we do with FP/VEC
	 * reload). Each time we wrap 256 switches, take an entry out of the
	 * SLB preload cache.
L
Linus Torvalds 已提交
487
	 */
488 489 490
	tsk->thread.load_slb++;
	if (!tsk->thread.load_slb) {
		unsigned long pc = KSTK_EIP(tsk);
L
Linus Torvalds 已提交
491

492 493 494 495 496 497 498
		preload_age(ti);
		preload_add(ti, pc);
	}

	for (i = 0; i < ti->slb_preload_nr; i++) {
		unsigned char idx;
		unsigned long ea;
L
Linus Torvalds 已提交
499

500 501
		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
		ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
L
Linus Torvalds 已提交
502

503 504
		slb_allocate_user(mm, ea);
	}
L
Linus Torvalds 已提交
505

506 507 508 509 510 511
	/*
	 * Synchronize slbmte preloads with possible subsequent user memory
	 * address accesses by the kernel (user mode won't happen until
	 * rfid, which is safe).
	 */
	asm volatile("isync" : : : "memory");
512 513
}

514 515 516
void slb_set_size(u16 size)
{
	mmu_slb_size = size;
517 518
}

L
Linus Torvalds 已提交
519 520
void slb_initialize(void)
{
521
	unsigned long linear_llp, vmalloc_llp, io_llp;
522
	unsigned long lflags;
523
	static int slb_encoding_inited;
524 525 526
#ifdef CONFIG_SPARSEMEM_VMEMMAP
	unsigned long vmemmap_llp;
#endif
527 528 529

	/* Prepare our SLB miss handler based on our page size */
	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
530 531 532
	io_llp = mmu_psize_defs[mmu_io_psize].sllp;
	vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
	get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
533 534 535
#ifdef CONFIG_SPARSEMEM_VMEMMAP
	vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
#endif
536 537
	if (!slb_encoding_inited) {
		slb_encoding_inited = 1;
538 539
		pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
		pr_devel("SLB: io      LLP = %04lx\n", io_llp);
540
#ifdef CONFIG_SPARSEMEM_VMEMMAP
541
		pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
542
#endif
543 544
	}

545
	get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
546 547
	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
548

549
	lflags = SLB_VSID_KERNEL | linear_llp;
L
Linus Torvalds 已提交
550

551
	/* Invalidate the entire SLB (even entry 0) & all the ERATS */
552 553 554
	asm volatile("isync":::"memory");
	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
	asm volatile("isync; slbia; isync":::"memory");
555
	create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
556

557 558 559 560 561
	/* For the boot cpu, we're running on the stack in init_thread_union,
	 * which is in the first segment of the linear mapping, and also
	 * get_paca()->kstack hasn't been initialized yet.
	 * For secondary cpus, we need to bolt the kernel stack entry now.
	 */
562
	slb_shadow_clear(KSTACK_INDEX);
563 564 565
	if (raw_smp_processor_id() != boot_cpuid &&
	    (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
		create_shadowed_slbe(get_paca()->kstack,
566
				     mmu_kernel_ssize, lflags, KSTACK_INDEX);
567

568
	asm volatile("isync":::"memory");
L
Linus Torvalds 已提交
569
}
570

571
static void slb_cache_update(unsigned long esid_data)
572 573 574
{
	int slb_cache_index;

575 576 577
	if (cpu_has_feature(CPU_FTR_ARCH_300))
		return; /* ISAv3.0B and later does not use slb_cache */

578
	/*
579
	 * Now update slb cache entries
580
	 */
581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
	slb_cache_index = local_paca->slb_cache_ptr;
	if (slb_cache_index < SLB_CACHE_ENTRIES) {
		/*
		 * We have space in slb cache for optimized switch_slb().
		 * Top 36 bits from esid_data as per ISA
		 */
		local_paca->slb_cache[slb_cache_index++] = esid_data >> 28;
		local_paca->slb_cache_ptr++;
	} else {
		/*
		 * Our cache is full and the current cache content strictly
		 * doesn't indicate the active SLB conents. Bump the ptr
		 * so that switch_slb() will ignore the cache.
		 */
		local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
	}
}
598

599
static enum slb_index alloc_slb_index(bool kernel)
600 601
{
	enum slb_index index;
602

603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
	/*
	 * The allocation bitmaps can become out of synch with the SLB
	 * when the _switch code does slbie when bolting a new stack
	 * segment and it must not be anywhere else in the SLB. This leaves
	 * a kernel allocated entry that is unused in the SLB. With very
	 * large systems or small segment sizes, the bitmaps could slowly
	 * fill with these entries. They will eventually be cleared out
	 * by the round robin allocator in that case, so it's probably not
	 * worth accounting for.
	 */

	/*
	 * SLBs beyond 32 entries are allocated with stab_rr only
	 * POWER7/8/9 have 32 SLB entries, this could be expanded if a
	 * future CPU has more.
	 */
	if (local_paca->slb_used_bitmap != U32_MAX) {
		index = ffz(local_paca->slb_used_bitmap);
		local_paca->slb_used_bitmap |= 1U << index;
		if (kernel)
			local_paca->slb_kern_bitmap |= 1U << index;
	} else {
		/* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
		index = local_paca->stab_rr;
		if (index < (mmu_slb_size - 1))
			index++;
		else
			index = SLB_NUM_BOLTED;
		local_paca->stab_rr = index;
		if (index < 32) {
			if (kernel)
				local_paca->slb_kern_bitmap |= 1U << index;
			else
				local_paca->slb_kern_bitmap &= ~(1U << index);
		}
	}
	BUG_ON(index < SLB_NUM_BOLTED);
640

641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
	return index;
}

static long slb_insert_entry(unsigned long ea, unsigned long context,
				unsigned long flags, int ssize, bool kernel)
{
	unsigned long vsid;
	unsigned long vsid_data, esid_data;
	enum slb_index index;

	vsid = get_vsid(context, ea, ssize);
	if (!vsid)
		return -EFAULT;

	/*
	 * There must not be a kernel SLB fault in alloc_slb_index or before
	 * slbmte here or the allocation bitmaps could get out of whack with
	 * the SLB.
	 *
	 * User SLB faults or preloads take this path which might get inlined
	 * into the caller, so add compiler barriers here to ensure unsafe
	 * memory accesses do not come between.
	 */
	barrier();

666
	index = alloc_slb_index(kernel);
667 668

	vsid_data = __mk_vsid_data(vsid, ssize, flags);
669 670 671 672 673
	esid_data = mk_esid_data(ea, ssize, index);

	/*
	 * No need for an isync before or after this slbmte. The exception
	 * we enter with and the rfid we exit with are context synchronizing.
674 675
	 * User preloads should add isync afterwards in case the kernel
	 * accesses user memory before it returns to userspace with rfid.
676
	 */
677
	assert_slb_presence(false, ea);
678
	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
679

680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
	barrier();

	if (!kernel)
		slb_cache_update(esid_data);

	return 0;
}

static long slb_allocate_kernel(unsigned long ea, unsigned long id)
{
	unsigned long context;
	unsigned long flags;
	int ssize;

	if (id == KERNEL_REGION_ID) {
695 696 697 698 699

		/* We only support upto MAX_PHYSMEM_BITS */
		if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
			return -EFAULT;

700
		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
701

702 703
#ifdef CONFIG_SPARSEMEM_VMEMMAP
	} else if (id == VMEMMAP_REGION_ID) {
704 705 706 707

		if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
			return -EFAULT;

708 709 710
		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
#endif
	} else if (id == VMALLOC_REGION_ID) {
711 712 713 714

		if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
			return -EFAULT;

715
		if (ea < H_VMALLOC_END)
716
			flags = local_paca->vmalloc_sllp;
717 718
		else
			flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
719
	} else {
720
		return -EFAULT;
721
	}
722 723 724 725 726

	ssize = MMU_SEGSIZE_1T;
	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
		ssize = MMU_SEGSIZE_256M;

727
	context = get_kernel_context(ea);
728
	return slb_insert_entry(ea, context, flags, ssize, true);
729 730
}

731
static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
732
{
733 734
	unsigned long context;
	unsigned long flags;
735
	int bpsize;
736
	int ssize;
737 738

	/*
739 740
	 * consider this as bad access if we take a SLB miss
	 * on an address above addr limit.
741
	 */
742
	if (ea >= mm_ctx_slb_addr_limit(&mm->context))
743 744
		return -EFAULT;

745
	context = get_user_context(&mm->context, ea);
746 747 748 749 750 751 752 753 754 755
	if (!context)
		return -EFAULT;

	if (unlikely(ea >= H_PGTABLE_RANGE)) {
		WARN_ON(1);
		return -EFAULT;
	}

	ssize = user_segment_size(ea);

756
	bpsize = get_slice_psize(mm, ea);
757 758 759
	flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;

	return slb_insert_entry(ea, context, flags, ssize, false);
760 761
}

762
long do_slb_fault(struct pt_regs *regs, unsigned long ea)
763
{
764
	unsigned long id = REGION_ID(ea);
765

766 767
	/* IRQs are not reconciled here, so can't check irqs_disabled */
	VM_WARN_ON(mfmsr() & MSR_EE);
768

769 770
	if (unlikely(!(regs->msr & MSR_RI)))
		return -EINVAL;
771 772

	/*
773 774 775 776 777 778 779 780 781 782 783 784 785
	 * SLB kernel faults must be very careful not to touch anything
	 * that is not bolted. E.g., PACA and global variables are okay,
	 * mm->context stuff is not.
	 *
	 * SLB user faults can access all of kernel memory, but must be
	 * careful not to touch things like IRQ state because it is not
	 * "reconciled" here. The difficulty is that we must use
	 * fast_exception_return to return from kernel SLB faults without
	 * looking at possible non-bolted memory. We could test user vs
	 * kernel faults in the interrupt handler asm and do a full fault,
	 * reconcile, ret_from_except for user faults which would make them
	 * first class kernel code. But for performance it's probably nicer
	 * if they go via fast_exception_return too.
786
	 */
787
	if (id >= KERNEL_REGION_ID) {
788 789 790 791 792 793 794 795 796 797 798
		long err;
#ifdef CONFIG_DEBUG_VM
		/* Catch recursive kernel SLB faults. */
		BUG_ON(local_paca->in_kernel_slb_handler);
		local_paca->in_kernel_slb_handler = 1;
#endif
		err = slb_allocate_kernel(ea, id);
#ifdef CONFIG_DEBUG_VM
		local_paca->in_kernel_slb_handler = 0;
#endif
		return err;
799 800
	} else {
		struct mm_struct *mm = current->mm;
801
		long err;
802

803 804
		if (unlikely(!mm))
			return -EFAULT;
805

806 807 808 809 810
		err = slb_allocate_user(mm, ea);
		if (!err)
			preload_add(current_thread_info(), ea);

		return err;
811 812
	}
}
813

814 815 816 817 818 819 820 821 822 823 824 825
void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
{
	if (err == -EFAULT) {
		if (user_mode(regs))
			_exception(SIGSEGV, regs, SEGV_BNDERR, ea);
		else
			bad_page_fault(regs, ea, SIGSEGV);
	} else if (err == -EINVAL) {
		unrecoverable_exception(regs);
	} else {
		BUG();
	}
826
}