tsb.c 13.3 KB
Newer Older
1 2
/* arch/sparc64/mm/tsb.c
 *
3
 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
4 5 6
 */

#include <linux/kernel.h>
7
#include <linux/preempt.h>
8 9 10 11
#include <asm/system.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
D
David S. Miller 已提交
12
#include <asm/mmu_context.h>
13
#include <asm/pgtable.h>
14
#include <asm/tsb.h>
15
#include <asm/oplib.h>
16 17 18

extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];

19
static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
20
{
21
	vaddr >>= hash_shift;
22
	return vaddr & (nentries - 1);
23 24
}

25
static inline int tag_compare(unsigned long tag, unsigned long vaddr)
26
{
27
	return (tag == (vaddr >> 22));
28 29 30 31 32 33 34 35 36 37 38 39
}

/* TSB flushes need only occur on the processor initiating the address
 * space modification, not on each cpu the address space has run on.
 * Only the TLB flush needs that treatment.
 */

void flush_tsb_kernel_range(unsigned long start, unsigned long end)
{
	unsigned long v;

	for (v = start; v < end; v += PAGE_SIZE) {
40 41
		unsigned long hash = tsb_hash(v, PAGE_SHIFT,
					      KERNEL_TSB_NENTRIES);
42
		struct tsb *ent = &swapper_tsb[hash];
43

44
		if (tag_compare(ent->tag, v))
45
			ent->tag = (1UL << TSB_TAG_INVALID_BIT);
46 47 48
	}
}

49
static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries)
50
{
51
	unsigned long i;
52

53 54
	for (i = 0; i < mp->tlb_nr; i++) {
		unsigned long v = mp->vaddrs[i];
55
		unsigned long tag, ent, hash;
56 57 58

		v &= ~0x1UL;

59 60
		hash = tsb_hash(v, hash_shift, nentries);
		ent = tsb + (hash * sizeof(struct tsb));
61
		tag = (v >> 22UL);
62 63

		tsb_flush(ent, tag);
64
	}
65 66 67 68 69 70 71 72
}

void flush_tsb_user(struct mmu_gather *mp)
{
	struct mm_struct *mm = mp->mm;
	unsigned long nentries, base, flags;

	spin_lock_irqsave(&mm->context.lock, flags);
73

74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
	base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
	nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
		base = __pa(base);
	__flush_tsb_one(mp, PAGE_SHIFT, base, nentries);

#ifdef CONFIG_HUGETLB_PAGE
	if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
			base = __pa(base);
		__flush_tsb_one(mp, HPAGE_SHIFT, base, nentries);
	}
#endif
89
	spin_unlock_irqrestore(&mm->context.lock, flags);
90
}
D
David S. Miller 已提交
91

92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define HV_PGSZ_IDX_BASE	HV_PGSZ_IDX_8K
#define HV_PGSZ_MASK_BASE	HV_PGSZ_MASK_8K
#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
#define HV_PGSZ_IDX_BASE	HV_PGSZ_IDX_64K
#define HV_PGSZ_MASK_BASE	HV_PGSZ_MASK_64K
#else
#error Broken base page size setting...
#endif

#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
#define HV_PGSZ_IDX_HUGE	HV_PGSZ_IDX_64K
#define HV_PGSZ_MASK_HUGE	HV_PGSZ_MASK_64K
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
#define HV_PGSZ_IDX_HUGE	HV_PGSZ_IDX_512K
#define HV_PGSZ_MASK_HUGE	HV_PGSZ_MASK_512K
#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
#define HV_PGSZ_IDX_HUGE	HV_PGSZ_IDX_4MB
#define HV_PGSZ_MASK_HUGE	HV_PGSZ_MASK_4MB
#else
#error Broken huge page size setting...
#endif
#endif

static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
118 119 120 121
{
	unsigned long tsb_reg, base, tsb_paddr;
	unsigned long page_sz, tte;

122 123
	mm->context.tsb_block[tsb_idx].tsb_nentries =
		tsb_bytes / sizeof(struct tsb);
124 125

	base = TSBMAP_BASE;
126
	tte = pgprot_val(PAGE_KERNEL_LOCKED);
127
	tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
128
	BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175

	/* Use the smallest page size that can map the whole TSB
	 * in one TLB entry.
	 */
	switch (tsb_bytes) {
	case 8192 << 0:
		tsb_reg = 0x0UL;
#ifdef DCACHE_ALIASING_POSSIBLE
		base += (tsb_paddr & 8192);
#endif
		page_sz = 8192;
		break;

	case 8192 << 1:
		tsb_reg = 0x1UL;
		page_sz = 64 * 1024;
		break;

	case 8192 << 2:
		tsb_reg = 0x2UL;
		page_sz = 64 * 1024;
		break;

	case 8192 << 3:
		tsb_reg = 0x3UL;
		page_sz = 64 * 1024;
		break;

	case 8192 << 4:
		tsb_reg = 0x4UL;
		page_sz = 512 * 1024;
		break;

	case 8192 << 5:
		tsb_reg = 0x5UL;
		page_sz = 512 * 1024;
		break;

	case 8192 << 6:
		tsb_reg = 0x6UL;
		page_sz = 512 * 1024;
		break;

	case 8192 << 7:
		tsb_reg = 0x7UL;
		page_sz = 4 * 1024 * 1024;
		break;
176 177

	default:
178 179 180
		printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
		       current->comm, current->pid, tsb_bytes);
		do_exit(SIGSEGV);
181
	};
182
	tte |= pte_sz_bits(page_sz);
183

184
	if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
185 186 187
		/* Physical mapping, no locked TLB entry for TSB.  */
		tsb_reg |= tsb_paddr;

188 189 190
		mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
		mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
		mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
191 192 193 194 195
	} else {
		tsb_reg |= base;
		tsb_reg |= (tsb_paddr & (page_sz - 1UL));
		tte |= (tsb_paddr & ~(page_sz - 1UL));

196 197 198
		mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
		mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
		mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
199
	}
200

201 202
	/* Setup the Hypervisor TSB descriptor.  */
	if (tlb_type == hypervisor) {
203
		struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
204

205 206 207
		switch (tsb_idx) {
		case MM_TSB_BASE:
			hp->pgsz_idx = HV_PGSZ_IDX_BASE;
208
			break;
209 210 211
#ifdef CONFIG_HUGETLB_PAGE
		case MM_TSB_HUGE:
			hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
212
			break;
213 214 215
#endif
		default:
			BUG();
216 217 218 219
		};
		hp->assoc = 1;
		hp->num_ttes = tsb_bytes / 16;
		hp->ctx_idx = 0;
220 221 222
		switch (tsb_idx) {
		case MM_TSB_BASE:
			hp->pgsz_mask = HV_PGSZ_MASK_BASE;
223
			break;
224 225 226
#ifdef CONFIG_HUGETLB_PAGE
		case MM_TSB_HUGE:
			hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
227
			break;
228 229 230
#endif
		default:
			BUG();
231 232 233 234
		};
		hp->tsb_base = tsb_paddr;
		hp->resv = 0;
	}
235 236
}

237
static struct kmem_cache *tsb_caches[8] __read_mostly;
238 239 240 241 242 243 244 245 246 247 248 249

static const char *tsb_cache_names[8] = {
	"tsb_8KB",
	"tsb_16KB",
	"tsb_32KB",
	"tsb_64KB",
	"tsb_128KB",
	"tsb_256KB",
	"tsb_512KB",
	"tsb_1MB",
};

D
David Miller 已提交
250
void __init pgtable_cache_init(void)
251 252 253 254 255 256 257 258 259
{
	unsigned long i;

	for (i = 0; i < 8; i++) {
		unsigned long size = 8192 << i;
		const char *name = tsb_cache_names[i];

		tsb_caches[i] = kmem_cache_create(name,
						  size, size,
260
						  0, NULL);
261 262 263 264 265 266 267
		if (!tsb_caches[i]) {
			prom_printf("Could not create %s cache\n", name);
			prom_halt();
		}
	}
}

D
David S. Miller 已提交
268 269 270 271 272 273 274 275 276 277 278 279
int sysctl_tsb_ratio = -2;

static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
{
	unsigned long num_ents = (new_size / sizeof(struct tsb));

	if (sysctl_tsb_ratio < 0)
		return num_ents - (num_ents >> -sysctl_tsb_ratio);
	else
		return num_ents + (num_ents >> sysctl_tsb_ratio);
}

280 281
/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
 * do_sparc64_fault() invokes this routine to try and grow it.
282
 *
283
 * When we reach the maximum TSB size supported, we stick ~0UL into
284
 * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
285 286 287 288
 * will not trigger any longer.
 *
 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
 * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
289 290
 * must be 512K aligned.  It also must be physically contiguous, so we
 * cannot use vmalloc().
291 292 293 294 295
 *
 * The idea here is to grow the TSB when the RSS of the process approaches
 * the number of entries that the current TSB can hold at once.  Currently,
 * we trigger when the RSS hits 3/4 of the TSB capacity.
 */
296
void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
297 298
{
	unsigned long max_tsb_size = 1 * 1024 * 1024;
299
	unsigned long new_size, old_size, flags;
300
	struct tsb *old_tsb, *new_tsb;
301 302
	unsigned long new_cache_index, old_cache_index;
	unsigned long new_rss_limit;
303
	gfp_t gfp_flags;
304 305 306 307

	if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
		max_tsb_size = (PAGE_SIZE << MAX_ORDER);

308 309
	new_cache_index = 0;
	for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
D
David S. Miller 已提交
310 311
		new_rss_limit = tsb_size_to_rss_limit(new_size);
		if (new_rss_limit > rss)
312
			break;
313
		new_cache_index++;
314 315
	}

316
	if (new_size == max_tsb_size)
317 318
		new_rss_limit = ~0UL;

319
retry_tsb_alloc:
320
	gfp_flags = GFP_KERNEL;
321
	if (new_size > (PAGE_SIZE * 2))
322 323
		gfp_flags = __GFP_NOWARN | __GFP_NORETRY;

324 325
	new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
					gfp_flags, numa_node_id());
326
	if (unlikely(!new_tsb)) {
327 328 329 330 331
		/* Not being able to fork due to a high-order TSB
		 * allocation failure is very bad behavior.  Just back
		 * down to a 0-order allocation and force no TSB
		 * growing for this address space.
		 */
332 333
		if (mm->context.tsb_block[tsb_index].tsb == NULL &&
		    new_cache_index > 0) {
334 335
			new_cache_index = 0;
			new_size = 8192;
336
			new_rss_limit = ~0UL;
337
			goto retry_tsb_alloc;
338 339 340 341 342
		}

		/* If we failed on a TSB grow, we are under serious
		 * memory pressure so don't try to grow any more.
		 */
343 344
		if (mm->context.tsb_block[tsb_index].tsb != NULL)
			mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
345
		return;
346
	}
347

348
	/* Mark all tags as invalid.  */
349
	tsb_init(new_tsb, new_size);
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374

	/* Ok, we are about to commit the changes.  If we are
	 * growing an existing TSB the locking is very tricky,
	 * so WATCH OUT!
	 *
	 * We have to hold mm->context.lock while committing to the
	 * new TSB, this synchronizes us with processors in
	 * flush_tsb_user() and switch_mm() for this address space.
	 *
	 * But even with that lock held, processors run asynchronously
	 * accessing the old TSB via TLB miss handling.  This is OK
	 * because those actions are just propagating state from the
	 * Linux page tables into the TSB, page table mappings are not
	 * being changed.  If a real fault occurs, the processor will
	 * synchronize with us when it hits flush_tsb_user(), this is
	 * also true for the case where vmscan is modifying the page
	 * tables.  The only thing we need to be careful with is to
	 * skip any locked TSB entries during copy_tsb().
	 *
	 * When we finish committing to the new TSB, we have to drop
	 * the lock and ask all other cpus running this address space
	 * to run tsb_context_switch() to see the new TSB table.
	 */
	spin_lock_irqsave(&mm->context.lock, flags);

375 376 377 378 379
	old_tsb = mm->context.tsb_block[tsb_index].tsb;
	old_cache_index =
		(mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
	old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
		    sizeof(struct tsb));
380

381

382 383 384 385
	/* Handle multiple threads trying to grow the TSB at the same time.
	 * One will get in here first, and bump the size and the RSS limit.
	 * The others will get in here next and hit this check.
	 */
386 387
	if (unlikely(old_tsb &&
		     (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
388 389
		spin_unlock_irqrestore(&mm->context.lock, flags);

390
		kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
391 392
		return;
	}
393

394
	mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
395

396 397 398 399 400 401 402 403 404 405 406 407
	if (old_tsb) {
		extern void copy_tsb(unsigned long old_tsb_base,
				     unsigned long old_tsb_size,
				     unsigned long new_tsb_base,
				     unsigned long new_tsb_size);
		unsigned long old_tsb_base = (unsigned long) old_tsb;
		unsigned long new_tsb_base = (unsigned long) new_tsb;

		if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
			old_tsb_base = __pa(old_tsb_base);
			new_tsb_base = __pa(new_tsb_base);
		}
408
		copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
409
	}
410

411 412
	mm->context.tsb_block[tsb_index].tsb = new_tsb;
	setup_tsb_params(mm, tsb_index, new_size);
413

414 415
	spin_unlock_irqrestore(&mm->context.lock, flags);

416 417 418 419
	/* If old_tsb is NULL, we're being invoked for the first time
	 * from init_new_context().
	 */
	if (old_tsb) {
420
		/* Reload it on the local cpu.  */
421 422
		tsb_context_switch(mm);

423
		/* Now force other processors to do the same.  */
424
		preempt_disable();
425
		smp_tsb_sync(mm);
426
		preempt_enable();
427 428

		/* Now it is safe to free the old tsb.  */
429
		kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
430 431 432
	}
}

D
David S. Miller 已提交
433 434
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
435 436 437 438 439
#ifdef CONFIG_HUGETLB_PAGE
	unsigned long huge_pte_count;
#endif
	unsigned int i;

440
	spin_lock_init(&mm->context.lock);
D
David S. Miller 已提交
441 442 443

	mm->context.sparc64_ctx_val = 0UL;

444 445 446 447 448 449 450 451 452
#ifdef CONFIG_HUGETLB_PAGE
	/* We reset it to zero because the fork() page copying
	 * will re-increment the counters as the parent PTEs are
	 * copied into the child address space.
	 */
	huge_pte_count = mm->context.huge_pte_count;
	mm->context.huge_pte_count = 0;
#endif

453 454 455 456
	/* copy_mm() copies over the parent's mm_struct before calling
	 * us, so we need to zero out the TSB pointer or else tsb_grow()
	 * will be confused and think there is an older TSB to free up.
	 */
457 458
	for (i = 0; i < MM_NUM_TSBS; i++)
		mm->context.tsb_block[i].tsb = NULL;
459 460 461 462

	/* If this is fork, inherit the parent's TSB size.  We would
	 * grow it to that size on the first page fault anyways.
	 */
463
	tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
464

465 466 467 468 469 470
#ifdef CONFIG_HUGETLB_PAGE
	if (unlikely(huge_pte_count))
		tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
#endif

	if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
471
		return -ENOMEM;
D
David S. Miller 已提交
472 473 474 475

	return 0;
}

476
static void tsb_destroy_one(struct tsb_config *tp)
D
David S. Miller 已提交
477
{
478
	unsigned long cache_index;
479

480 481 482 483 484 485 486
	if (!tp->tsb)
		return;
	cache_index = tp->tsb_reg_val & 0x7UL;
	kmem_cache_free(tsb_caches[cache_index], tp->tsb);
	tp->tsb = NULL;
	tp->tsb_reg_val = 0UL;
}
487

488 489 490 491 492 493
void destroy_context(struct mm_struct *mm)
{
	unsigned long flags, i;

	for (i = 0; i < MM_NUM_TSBS; i++)
		tsb_destroy_one(&mm->context.tsb_block[i]);
D
David S. Miller 已提交
494

495
	spin_lock_irqsave(&ctx_alloc_lock, flags);
D
David S. Miller 已提交
496 497 498 499 500 501

	if (CTX_VALID(mm->context)) {
		unsigned long nr = CTX_NRBITS(mm->context);
		mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
	}

502
	spin_unlock_irqrestore(&ctx_alloc_lock, flags);
D
David S. Miller 已提交
503
}