tsb.S 13.2 KB
Newer Older
1 2 3 4 5
/* tsb.S: Sparc64 TSB table handling.
 *
 * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
 */

6

7
#include <asm/tsb.h>
8
#include <asm/hypervisor.h>
9 10 11
#include <asm/page.h>
#include <asm/cpudata.h>
#include <asm/mmu.h>
12 13 14 15 16 17 18 19 20 21 22 23 24 25

	.text
	.align	32

	/* Invoked from TLB miss handler, we are in the
	 * MMU global registers and they are setup like
	 * this:
	 *
	 * %g1: TSB entry pointer
	 * %g2:	available temporary
	 * %g3:	FAULT_CODE_{D,I}TLB
	 * %g4:	available temporary
	 * %g5:	available temporary
	 * %g6: TAG TARGET
26 27
	 * %g7:	available temporary, will be loaded by us with
	 *      the physical address base of the linux page
28 29 30 31
	 *      tables for the current address space
	 */
tsb_miss_dtlb:
	mov		TLB_TAG_ACCESS, %g4
32 33
	ldxa		[%g4] ASI_DMMU, %g4
	srlx		%g4, PAGE_SHIFT, %g4
34
	ba,pt		%xcc, tsb_miss_page_table_walk
35
	 sllx		%g4, PAGE_SHIFT, %g4
36 37 38

tsb_miss_itlb:
	mov		TLB_TAG_ACCESS, %g4
39 40
	ldxa		[%g4] ASI_IMMU, %g4
	srlx		%g4, PAGE_SHIFT, %g4
41
	ba,pt		%xcc, tsb_miss_page_table_walk
42
	 sllx		%g4, PAGE_SHIFT, %g4
43

44
	/* At this point we have:
45
	 * %g1 --	PAGE_SIZE TSB entry address
46 47
	 * %g3 --	FAULT_CODE_{D,I}TLB
	 * %g4 --	missing virtual address
48
	 * %g6 --	TAG TARGET (vaddr >> 22)
49
	 */
50
tsb_miss_page_table_walk:
51
	TRAP_LOAD_TRAP_BLOCK(%g7, %g5)
52

53 54 55
	/* Before committing to a full page table walk,
	 * check the huge page TSB.
	 */
56
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81

661:	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
	nop
	.section	.sun4v_2insn_patch, "ax"
	.word		661b
	mov		SCRATCHPAD_UTSBREG2, %g5
	ldxa		[%g5] ASI_SCRATCHPAD, %g5
	.previous

	cmp		%g5, -1
	be,pt		%xcc, 80f
	 nop

	/* We need an aligned pair of registers containing 2 values
	 * which can be easily rematerialized.  %g6 and %g7 foot the
	 * bill just nicely.  We'll save %g6 away into %g2 for the
	 * huge page TSB TAG comparison.
	 *
	 * Perform a huge page TSB lookup.
	 */
	mov		%g6, %g2
	and		%g5, 0x7, %g6
	mov		512, %g7
	andn		%g5, 0x7, %g5
	sllx		%g7, %g6, %g7
82
	srlx		%g4, REAL_HPAGE_SHIFT, %g6
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
	sub		%g7, 1, %g7
	and		%g6, %g7, %g6
	sllx		%g6, 4, %g6
	add		%g5, %g6, %g5

	TSB_LOAD_QUAD(%g5, %g6)
	cmp		%g6, %g2
	be,a,pt		%xcc, tsb_tlb_reload
	 mov		%g7, %g5

	/* No match, remember the huge page TSB entry address,
	 * and restore %g6 and %g7.
	 */
	TRAP_LOAD_TRAP_BLOCK(%g7, %g6)
	srlx		%g4, 22, %g6
80:	stx		%g5, [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP]

#endif

	ldx		[%g7 + TRAP_PER_CPU_PGD_PADDR], %g7
103

104 105 106
	/* At this point we have:
	 * %g1 --	TSB entry address
	 * %g3 --	FAULT_CODE_{D,I}TLB
107
	 * %g4 --	missing virtual address
108
	 * %g6 --	TAG TARGET (vaddr >> 22)
109 110 111 112
	 * %g7 --	page table physical address
	 *
	 * We know that both the base PAGE_SIZE TSB and the HPAGE_SIZE
	 * TSB both lack a matching entry.
113
	 */
114 115
tsb_miss_page_table_walk_sun4v_fastpath:
	USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
116

117
	/* Valid PTE is now in %g5.  */
118

119
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
N
Nitin Gupta 已提交
120
	sethi		%uhi(_PAGE_PMD_HUGE), %g7
121 122
	sllx		%g7, 32, %g7

N
Nitin Gupta 已提交
123 124
	andcc		%g5, %g7, %g0
	be,pt		%xcc, 60f
125 126 127
	 nop

	/* It is a huge page, use huge page TSB entry address we
128 129 130 131 132 133 134 135 136 137 138
	 * calculated above.  If the huge page TSB has not been
	 * allocated, setup a trap stack and call hugetlb_setup()
	 * to do so, then return from the trap to replay the TLB
	 * miss.
	 *
	 * This is necessary to handle the case of transparent huge
	 * pages where we don't really have a non-atomic context
	 * in which to allocate the hugepage TSB hash table.  When
	 * the 'mm' faults in the hugepage for the first time, we
	 * thus handle it here.  This also makes sure that we can
	 * allocate the TSB hash table on the correct NUMA node.
139 140
	 */
	TRAP_LOAD_TRAP_BLOCK(%g7, %g2)
141 142 143 144 145 146 147 148 149 150 151 152 153
	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g1
	cmp		%g1, -1
	bne,pt		%xcc, 60f
	 nop

661:	rdpr		%pstate, %g5
	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
	.section	.sun4v_2insn_patch, "ax"
	.word		661b
	SET_GL(1)
	nop
	.previous

154 155
	rdpr	%tl, %g7
	cmp	%g7, 1
156
	bne,pn	%xcc, winfix_trampoline
157
	 mov	%g3, %g4
158 159 160 161 162 163 164
	ba,pt	%xcc, etrap
	 rd	%pc, %g7
	call	hugetlb_setup
	 add	%sp, PTREGS_OFF, %o0
	ba,pt	%xcc, rtrap
	 nop

165 166
60:
#endif
167

168 169 170 171 172 173 174 175
	/* At this point we have:
	 * %g1 --	TSB entry address
	 * %g3 --	FAULT_CODE_{D,I}TLB
	 * %g5 --	valid PTE
	 * %g6 --	TAG TARGET (vaddr >> 22)
	 */
tsb_reload:
	TSB_LOCK_TAG(%g1, %g2, %g7)
176 177 178 179 180 181 182 183 184
	TSB_WRITE(%g1, %g5, %g6)

	/* Finally, load TLB and return from trap.  */
tsb_tlb_reload:
	cmp		%g3, FAULT_CODE_DTLB
	bne,pn		%xcc, tsb_itlb_load
	 nop

tsb_dtlb_load:
185 186

661:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN
187
	retry
188
	.section	.sun4v_2insn_patch, "ax"
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
	.word		661b
	nop
	nop
	.previous

	/* For sun4v the ASI_DTLB_DATA_IN store and the retry
	 * instruction get nop'd out and we get here to branch
	 * to the sun4v tlb load code.  The registers are setup
	 * as follows:
	 *
	 * %g4: vaddr
	 * %g5: PTE
	 * %g6:	TAG
	 *
	 * The sun4v TLB load wants the PTE in %g3 so we fix that
	 * up here.
	 */
	ba,pt		%xcc, sun4v_dtlb_load
	 mov		%g5, %g3
208 209

tsb_itlb_load:
210
	/* Executable bit must be set.  */
211 212 213
661:	sethi		%hi(_PAGE_EXEC_4U), %g4
	andcc		%g5, %g4, %g0
	.section	.sun4v_2insn_patch, "ax"
214 215
	.word		661b
	andcc		%g5, _PAGE_EXEC_4V, %g0
216
	nop
217 218 219 220
	.previous

	be,pn		%xcc, tsb_do_fault
	 nop
221 222

661:	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
223
	retry
224
	.section	.sun4v_2insn_patch, "ax"
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
	.word		661b
	nop
	nop
	.previous

	/* For sun4v the ASI_ITLB_DATA_IN store and the retry
	 * instruction get nop'd out and we get here to branch
	 * to the sun4v tlb load code.  The registers are setup
	 * as follows:
	 *
	 * %g4: vaddr
	 * %g5: PTE
	 * %g6:	TAG
	 *
	 * The sun4v TLB load wants the PTE in %g3 so we fix that
	 * up here.
	 */
	ba,pt		%xcc, sun4v_itlb_load
	 mov		%g5, %g3
244 245 246 247 248 249 250 251

	/* No valid entry in the page tables, do full fault
	 * processing.
	 */

	.globl		tsb_do_fault
tsb_do_fault:
	cmp		%g3, FAULT_CODE_DTLB
252 253 254

661:	rdpr		%pstate, %g5
	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
255
	.section	.sun4v_2insn_patch, "ax"
256
	.word		661b
257
	SET_GL(1)
258
	ldxa		[%g0] ASI_SCRATCHPAD, %g4
259 260
	.previous

261
	bne,pn		%xcc, tsb_do_itlb_fault
262
	 nop
263 264

tsb_do_dtlb_fault:
265 266 267 268
	rdpr	%tl, %g3
	cmp	%g3, 1

661:	mov	TLB_TAG_ACCESS, %g4
269
	ldxa	[%g4] ASI_DMMU, %g5
270
	.section .sun4v_2insn_patch, "ax"
271
	.word	661b
272
	ldx	[%g4 + HV_FAULT_D_ADDR_OFFSET], %g5
273 274 275
	nop
	.previous

276 277 278 279
	/* Clear context ID bits.  */
	srlx		%g5, PAGE_SHIFT, %g5
	sllx		%g5, PAGE_SHIFT, %g5

280 281 282 283 284 285 286 287 288 289 290 291
	be,pt	%xcc, sparc64_realfault_common
	 mov	FAULT_CODE_DTLB, %g4
	ba,pt	%xcc, winfix_trampoline
	 nop

tsb_do_itlb_fault:
	rdpr	%tpc, %g5
	ba,pt	%xcc, sparc64_realfault_common
	 mov	FAULT_CODE_ITLB, %g4

	.globl	sparc64_realfault_common
sparc64_realfault_common:
292 293 294
	/* fault code in %g4, fault address in %g5, etrap will
	 * preserve these two values in %l4 and %l5 respectively
	 */
295 296
	ba,pt	%xcc, etrap			! Save trap state
1:	 rd	%pc, %g7			! ...
297 298
	stb	%l4, [%g6 + TI_FAULT_CODE]	! Save fault code
	stx	%l5, [%g6 + TI_FAULT_ADDR]	! Save fault address
299 300
	call	do_sparc64_fault		! Call fault handler
	 add	%sp, PTREGS_OFF, %o0		! Compute pt_regs arg
301
	ba,pt	%xcc, rtrap			! Restore cpu state
302 303 304 305 306 307 308 309
	 nop					! Delay slot (fill me)

winfix_trampoline:
	rdpr	%tpc, %g3			! Prepare winfixup TNPC
	or	%g3, 0x7c, %g3			! Compute branch offset
	wrpr	%g3, %tnpc			! Write it into TNPC
	done					! Trap return

310 311
	/* Insert an entry into the TSB.
	 *
312
	 * %o0: TSB entry pointer (virt or phys address)
313 314 315 316
	 * %o1: tag
	 * %o2:	pte
	 */
	.align	32
317 318
	.globl	__tsb_insert
__tsb_insert:
319 320 321 322 323 324 325
	rdpr	%pstate, %o5
	wrpr	%o5, PSTATE_IE, %pstate
	TSB_LOCK_TAG(%o0, %g2, %g3)
	TSB_WRITE(%o0, %o2, %o1)
	wrpr	%o5, %pstate
	retl
	 nop
326
	.size	__tsb_insert, .-__tsb_insert
327

328 329 330 331 332 333 334 335
	/* Flush the given TSB entry if it has the matching
	 * tag.
	 *
	 * %o0: TSB entry pointer (virt or phys address)
	 * %o1:	tag
	 */
	.align	32
	.globl	tsb_flush
336
	.type	tsb_flush,#function
337 338 339 340 341 342
tsb_flush:
	sethi	%hi(TSB_TAG_LOCK_HIGH), %g2
1:	TSB_LOAD_TAG(%o0, %g1)
	srlx	%g1, 32, %o3
	andcc	%o3, %g2, %g0
	bne,pn	%icc, 1b
343
	 nop
344
	cmp	%g1, %o1
345
	mov	1, %o3
346
	bne,pt	%xcc, 2f
347
	 sllx	%o3, TSB_TAG_INVALID_BIT, %o3
348 349 350 351 352
	TSB_CAS_TAG(%o0, %g1, %o3)
	cmp	%g1, %o3
	bne,pn	%xcc, 1b
	 nop
2:	retl
353
	 nop
354
	.size	tsb_flush, .-tsb_flush
355

356 357 358 359
	/* Reload MMU related context switch state at
	 * schedule() time.
	 *
	 * %o0: page table physical address
360 361 362
	 * %o1:	TSB base config pointer
	 * %o2:	TSB huge config pointer, or NULL if none
	 * %o3:	Hypervisor TSB descriptor physical address
363
	 * %o4: Secondary context to load, if non-zero
364 365 366 367
	 *
	 * We have to run this whole thing with interrupts
	 * disabled so that the current cpu doesn't change
	 * due to preemption.
368
	 */
369
	.align	32
370
	.globl	__tsb_context_switch
371
	.type	__tsb_context_switch,#function
372
__tsb_context_switch:
373 374 375
	rdpr	%pstate, %g1
	wrpr	%g1, PSTATE_IE, %pstate

376 377 378 379 380 381 382 383 384 385 386
	brz,pn	%o4, 1f
	 mov	SECONDARY_CONTEXT, %o5

661:	stxa	%o4, [%o5] ASI_DMMU
	.section .sun4v_1insn_patch, "ax"
	.word	661b
	stxa	%o4, [%o5] ASI_MMU
	.previous
	flush	%g6

1:
387
	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
388

389
	stx	%o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
390

391 392 393 394 395 396 397 398 399 400 401 402
	ldx	[%o1 + TSB_CONFIG_REG_VAL], %o0
	brz,pt	%o2, 1f
	 mov	-1, %g3

	ldx	[%o2 + TSB_CONFIG_REG_VAL], %g3

1:	stx	%g3, [%g2 + TRAP_PER_CPU_TSB_HUGE]

	sethi	%hi(tlb_type), %g2
	lduw	[%g2 + %lo(tlb_type)], %g2
	cmp	%g2, 3
	bne,pt	%icc, 50f
403 404 405
	 nop

	/* Hypervisor TSB switch. */
406 407 408 409 410 411 412 413
	mov	SCRATCHPAD_UTSBREG1, %o5
	stxa	%o0, [%o5] ASI_SCRATCHPAD
	mov	SCRATCHPAD_UTSBREG2, %o5
	stxa	%g3, [%o5] ASI_SCRATCHPAD

	mov	2, %o0
	cmp	%g3, -1
	move	%xcc, 1, %o0
414

415
	mov	HV_FAST_MMU_TSB_CTXNON0, %o5
416
	mov	%o3, %o1
417 418
	ta	HV_FAST_TRAP

419
	/* Finish up.  */
420
	ba,pt	%xcc, 9f
421
	 nop
422

423
	/* SUN4U TSB switch.  */
424 425
50:	mov	TSB_REG, %o5
	stxa	%o0, [%o5] ASI_DMMU
426
	membar	#Sync
427
	stxa	%o0, [%o5] ASI_IMMU
428 429
	membar	#Sync

430 431 432
2:	ldx	[%o1 + TSB_CONFIG_MAP_VADDR], %o4
	brz	%o4, 9f
	 ldx	[%o1 + TSB_CONFIG_MAP_PTE], %o5
433

434
	sethi	%hi(sparc64_highest_unlocked_tlb_ent), %g2
435
	mov	TLB_TAG_ACCESS, %g3
436
	lduw	[%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2
437
	stxa	%o4, [%g3] ASI_DMMU
438 439
	membar	#Sync
	sllx	%g2, 3, %g2
440 441 442 443 444 445 446 447 448 449 450 451 452
	stxa	%o5, [%g2] ASI_DTLB_DATA_ACCESS
	membar	#Sync

	brz,pt	%o2, 9f
	 nop

	ldx	[%o2 + TSB_CONFIG_MAP_VADDR], %o4
	ldx	[%o2 + TSB_CONFIG_MAP_PTE], %o5
	mov	TLB_TAG_ACCESS, %g3
	stxa	%o4, [%g3] ASI_DMMU
	membar	#Sync
	sub	%g2, (1 << 3), %g2
	stxa	%o5, [%g2] ASI_DTLB_DATA_ACCESS
453
	membar	#Sync
454

455
9:
456
	wrpr	%g1, %pstate
457 458

	retl
459
	 nop
460 461 462 463 464 465 466 467 468 469
	.size	__tsb_context_switch, .-__tsb_context_switch

#define TSB_PASS_BITS	((1 << TSB_TAG_LOCK_BIT) | \
			 (1 << TSB_TAG_INVALID_BIT))

	.align	32
	.globl	copy_tsb
	.type	copy_tsb,#function
copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
			 * %o2=new_tsb_base, %o3=new_tsb_size
470
			 * %o4=page_size_shift
471 472 473
			 */
	sethi		%uhi(TSB_PASS_BITS), %g7
	srlx		%o3, 4, %o3
474
	add		%o0, %o1, %o1	/* end of old tsb */
475 476 477
	sllx		%g7, 32, %g7
	sub		%o3, 1, %o3	/* %o3 == new tsb hash mask */

478 479
	mov		%o4, %g1	/* page_size_shift */

480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
661:	prefetcha	[%o0] ASI_N, #one_read
	.section	.tsb_phys_patch, "ax"
	.word		661b
	prefetcha	[%o0] ASI_PHYS_USE_EC, #one_read
	.previous

90:	andcc		%o0, (64 - 1), %g0
	bne		1f
	 add		%o0, 64, %o5

661:	prefetcha	[%o5] ASI_N, #one_read
	.section	.tsb_phys_patch, "ax"
	.word		661b
	prefetcha	[%o5] ASI_PHYS_USE_EC, #one_read
	.previous

1:	TSB_LOAD_QUAD(%o0, %g2)		/* %g2/%g3 == TSB entry */
	andcc		%g2, %g7, %g0	/* LOCK or INVALID set? */
	bne,pn		%xcc, 80f	/* Skip it */
	 sllx		%g2, 22, %o4	/* TAG --> VADDR */

	/* This can definitely be computed faster... */
	srlx		%o0, 4, %o5	/* Build index */
	and		%o5, 511, %o5	/* Mask index */
504
	sllx		%o5, %g1, %o5	/* Put into vaddr position */
505
	or		%o4, %o5, %o4	/* Full VADDR. */
506
	srlx		%o4, %g1, %o4	/* Shift down to create index */
507 508 509 510 511 512 513
	and		%o4, %o3, %o4	/* Mask with new_tsb_nents-1 */
	sllx		%o4, 4, %o4	/* Shift back up into tsb ent offset */
	TSB_STORE(%o2 + %o4, %g2)	/* Store TAG */
	add		%o4, 0x8, %o4	/* Advance to TTE */
	TSB_STORE(%o2 + %o4, %g3)	/* Store TTE */

80:	add		%o0, 16, %o0
514
	cmp		%o0, %o1
515 516 517 518
	bne,pt		%xcc, 90b
	 nop

	retl
519
	 nop
520
	.size		copy_tsb, .-copy_tsb
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586

	/* Set the invalid bit in all TSB entries.  */
	.align		32
	.globl		tsb_init
	.type		tsb_init,#function
tsb_init:		/* %o0 = TSB vaddr, %o1 = size in bytes */
	prefetch	[%o0 + 0x000], #n_writes
	mov		1, %g1
	prefetch	[%o0 + 0x040], #n_writes
	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
	prefetch	[%o0 + 0x080], #n_writes
1:	prefetch	[%o0 + 0x0c0], #n_writes
	stx		%g1, [%o0 + 0x00]
	stx		%g1, [%o0 + 0x10]
	stx		%g1, [%o0 + 0x20]
	stx		%g1, [%o0 + 0x30]
	prefetch	[%o0 + 0x100], #n_writes
	stx		%g1, [%o0 + 0x40]
	stx		%g1, [%o0 + 0x50]
	stx		%g1, [%o0 + 0x60]
	stx		%g1, [%o0 + 0x70]
	prefetch	[%o0 + 0x140], #n_writes
	stx		%g1, [%o0 + 0x80]
	stx		%g1, [%o0 + 0x90]
	stx		%g1, [%o0 + 0xa0]
	stx		%g1, [%o0 + 0xb0]
	prefetch	[%o0 + 0x180], #n_writes
	stx		%g1, [%o0 + 0xc0]
	stx		%g1, [%o0 + 0xd0]
	stx		%g1, [%o0 + 0xe0]
	stx		%g1, [%o0 + 0xf0]
	subcc		%o1, 0x100, %o1
	bne,pt		%xcc, 1b
	 add		%o0, 0x100, %o0
	retl
	 nop
	nop
	nop
	.size		tsb_init, .-tsb_init

	.globl		NGtsb_init
	.type		NGtsb_init,#function
NGtsb_init:
	rd		%asi, %g2
	mov		1, %g1
	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
1:	stxa		%g1, [%o0 + 0x00] %asi
	stxa		%g1, [%o0 + 0x10] %asi
	stxa		%g1, [%o0 + 0x20] %asi
	stxa		%g1, [%o0 + 0x30] %asi
	stxa		%g1, [%o0 + 0x40] %asi
	stxa		%g1, [%o0 + 0x50] %asi
	stxa		%g1, [%o0 + 0x60] %asi
	stxa		%g1, [%o0 + 0x70] %asi
	stxa		%g1, [%o0 + 0x80] %asi
	stxa		%g1, [%o0 + 0x90] %asi
	stxa		%g1, [%o0 + 0xa0] %asi
	stxa		%g1, [%o0 + 0xb0] %asi
	stxa		%g1, [%o0 + 0xc0] %asi
	stxa		%g1, [%o0 + 0xd0] %asi
	stxa		%g1, [%o0 + 0xe0] %asi
	stxa		%g1, [%o0 + 0xf0] %asi
	subcc		%o1, 0x100, %o1
	bne,pt		%xcc, 1b
	 add		%o0, 0x100, %o0
587
	membar		#Sync
588 589 590
	retl
	 wr		%g2, 0x0, %asi
	.size		NGtsb_init, .-NGtsb_init