tlbex.S 11.8 KB
Newer Older
V
Vineet Gupta 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
/*
 * TLB Exception Handling for ARC
 *
 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * Vineetg: April 2011 :
 *  -MMU v1: moved out legacy code into a seperate file
 *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
 *      helps avoid a shift when preparing PD0 from PTE
 *
 * Vineetg: July 2009
 *  -For MMU V2, we need not do heuristics at the time of commiting a D-TLB
 *   entry, so that it doesn't knock out it's I-TLB entry
 *  -Some more fine tuning:
 *   bmsk instead of add, asl.cc instead of branch, delay slot utilise etc
 *
 * Vineetg: July 2009
 *  -Practically rewrote the I/D TLB Miss handlers
 *   Now 40 and 135 instructions a peice as compared to 131 and 449 resp.
 *   Hence Leaner by 1.5 K
 *   Used Conditional arithmetic to replace excessive branching
 *   Also used short instructions wherever possible
 *
 * Vineetg: Aug 13th 2008
 *  -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing
 *   more information in case of a Fatality
 *
 * Vineetg: March 25th Bug #92690
 *  -Added Debug Code to check if sw-ASID == hw-ASID

 * Rahul Trivedi, Amit Bhor: Codito Technologies 2004
 */

	.cpu A7

#include <linux/linkage.h>
#include <asm/entry.h>
V
Vineet Gupta 已提交
42
#include <asm/mmu.h>
V
Vineet Gupta 已提交
43 44 45 46 47 48
#include <asm/pgtable.h>
#include <asm/arcregs.h>
#include <asm/cache.h>
#include <asm/processor.h>
#include <asm/tlb-mmu1.h>

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
;-----------------------------------------------------------------
; ARC700 Exception Handling doesn't auto-switch stack and it only provides
; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
;
; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
; "global" is used to free-up FIRST core reg to be able to code the rest of
; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
; need to be saved as well by extending the "global" to be 4 words. Hence
;	".size   ex_saved_reg1, 16"
; [All of this dance is to avoid stack switching for each TLB Miss, since we
; only need to save only a handful of regs, as opposed to complete reg file]
;
; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
; core reg as it will not be SMP safe.
; Thus scratch AUX reg is used (and no longer used to cache task PGD).
; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
; Epilogue thus has to locate the "per-cpu" storage for regs.
; To avoid cache line bouncing the per-cpu global is aligned/sized per
; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"

; As simple as that....
V
Vineet Gupta 已提交
72 73
;--------------------------------------------------------------------------

74
; scratch memory to save [r0-r3] used to code TLB refill Handler
75
ARCFP_DATA ex_saved_reg1
76
	.align 1 << L1_CACHE_SHIFT
V
Vineet Gupta 已提交
77
	.type   ex_saved_reg1, @object
V
Vineet Gupta 已提交
78 79 80 81 82
#ifdef CONFIG_SMP
	.size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
ex_saved_reg1:
	.zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
#else
V
Vineet Gupta 已提交
83 84 85
	.size   ex_saved_reg1, 16
ex_saved_reg1:
	.zero 16
V
Vineet Gupta 已提交
86
#endif
V
Vineet Gupta 已提交
87

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
.macro TLBMISS_FREEUP_REGS
#ifdef CONFIG_SMP
	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
	GET_CPU_ID  r0			; get to per cpu scratch mem,
	lsl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
	add r0, @ex_saved_reg1, r0
#else
	st    r0, [@ex_saved_reg1]
	mov_s r0, @ex_saved_reg1
#endif
	st_s  r1, [r0, 4]
	st_s  r2, [r0, 8]
	st_s  r3, [r0, 12]

	; VERIFY if the ASID in MMU-PID Reg is same as
	; one in Linux data structures

	DBG_ASID_MISMATCH
.endm

.macro TLBMISS_RESTORE_REGS
#ifdef CONFIG_SMP
	GET_CPU_ID  r0			; get to per cpu scratch mem
	lsl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
	add r0, @ex_saved_reg1, r0
	ld_s  r3, [r0,12]
	ld_s  r2, [r0, 8]
	ld_s  r1, [r0, 4]
	lr    r0, [ARC_REG_SCRATCH_DATA0]
#else
	mov_s r0, @ex_saved_reg1
	ld_s  r3, [r0,12]
	ld_s  r2, [r0, 8]
	ld_s  r1, [r0, 4]
	ld_s  r0, [r0]
#endif
.endm

V
Vineet Gupta 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
;============================================================================
;  Troubleshooting Stuff
;============================================================================

; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
; we use the MMU PID Reg to get current ASID.
; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
; So we try to detect this in TLB Mis shandler


.macro DBG_ASID_MISMATCH

#ifdef CONFIG_ARC_DBG_TLB_PARANOIA

	; make sure h/w ASID is same as s/w ASID

	GET_CURR_TASK_ON_CPU  r3
	ld r0, [r3, TASK_ACT_MM]
	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]

	lr r1, [ARC_REG_PID]
	and r1, r1, 0xFF
	breq r1, r0, 5f

	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
	lr  r0, [erstatus]
	bbit0 r0, STATUS_U_BIT, 5f

	; We sure are in troubled waters, Flag the error, but to do so
	; need to switch to kernel mode stack to call error routine
	GET_TSK_STACK_BASE   r3, sp

	; Call printk to shoutout aloud
	mov r0, 1
	j print_asid_mismatch

5:   ; ASIDs match so proceed normally
	nop

#endif

.endm

;============================================================================
;TLB Miss handling Code
;============================================================================

;-----------------------------------------------------------------------------
; This macro does the page-table lookup for the faulting address.
; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
.macro LOAD_FAULT_PTE

	lr  r2, [efa]

V
Vineet Gupta 已提交
181
#ifndef CONFIG_SMP
V
Vineet Gupta 已提交
182
	lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
V
Vineet Gupta 已提交
183 184 185 186 187
#else
	GET_CURR_TASK_ON_CPU  r1
	ld  r1, [r1, TASK_ACT_MM]
	ld  r1, [r1, MM_PGD]
#endif
V
Vineet Gupta 已提交
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203

	lsr     r0, r2, PGDIR_SHIFT     ; Bits for indexing into PGD
	ld.as   r1, [r1, r0]            ; PGD entry corresp to faulting addr
	and.f   r1, r1, PAGE_MASK       ; Ignoring protection and other flags
	;   contains Ptr to Page Table
	bz.d    do_slow_path_pf         ; if no Page Table, do page fault

	; Get the PTE entry: The idea is
	; (1) x = addr >> PAGE_SHIFT 	-> masks page-off bits from @fault-addr
	; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
	; (3) z = pgtbl[y]
	; To avoid the multiply by in end, we do the -2, <<2 below

	lsr     r0, r2, (PAGE_SHIFT - 2)
	and     r0, r0, ( (PTRS_PER_PTE - 1) << 2)
	ld.aw   r0, [r1, r0]            ; get PTE and PTE ptr for fault addr
V
Vineet Gupta 已提交
204 205 206
#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
	and.f 0, r0, _PAGE_PRESENT
	bz   1f
207 208 209
	ld   r3, [num_pte_not_present]
	add  r3, r3, 1
	st   r3, [num_pte_not_present]
V
Vineet Gupta 已提交
210 211
1:
#endif
V
Vineet Gupta 已提交
212 213 214 215 216 217 218 219 220

.endm

;-----------------------------------------------------------------
; Convert Linux PTE entry into TLB entry
; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
; IN: r0 = PTE, r1 = ptr to PTE

.macro CONV_PTE_TO_TLB
221 222 223 224 225 226 227 228 229
	and    r3, r0, PTE_BITS_RWX	;       r w x
	lsl    r2, r3, 3		; r w x 0 0 0
	and.f  0,  r0, _PAGE_GLOBAL
	or.z   r2, r2, r3		; r w x r w x

	and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
	or  r3, r3, r2

	sr  r3, [ARC_REG_TLBPD1]    	; these go in PD1
V
Vineet Gupta 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255

	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb

	lr  r3,[ARC_REG_TLBPD0]     ; MMU prepares PD0 with vaddr and asid

	or  r3, r3, r2              ; S | vaddr | {sasid|asid}
	sr  r3,[ARC_REG_TLBPD0]     ; rewrite PD0
.endm

;-----------------------------------------------------------------
; Commit the TLB entry into MMU

.macro COMMIT_ENTRY_TO_MMU

	/* Get free TLB slot: Set = computed from vaddr, way = random */
	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]

	/* Commit the Write */
#if (CONFIG_ARC_MMU_VER >= 2)   /* introduced in v2 */
	sr TLBWriteNI, [ARC_REG_TLBCOMMAND]
#else
	sr TLBWrite, [ARC_REG_TLBCOMMAND]
#endif
.endm


256
ARCFP_CODE	;Fast Path Code, candidate for ICCM
V
Vineet Gupta 已提交
257 258 259 260 261 262 263 264 265

;-----------------------------------------------------------------------------
; I-TLB Miss Exception Handler
;-----------------------------------------------------------------------------

ARC_ENTRY EV_TLBMissI

	TLBMISS_FREEUP_REGS

V
Vineet Gupta 已提交
266 267 268 269 270 271
#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
	ld  r0, [@numitlb]
	add r0, r0, 1
	st  r0, [@numitlb]
#endif

V
Vineet Gupta 已提交
272
	;----------------------------------------------------------------
273
	; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA
V
Vineet Gupta 已提交
274 275 276 277 278
	LOAD_FAULT_PTE

	;----------------------------------------------------------------
	; VERIFY_PTE: Check if PTE permissions approp for executing code
	cmp_s   r2, VMALLOC_START
279 280
	mov_s   r2, (_PAGE_PRESENT | _PAGE_EXECUTE)
	or.hs   r2, r2, _PAGE_GLOBAL
V
Vineet Gupta 已提交
281 282 283 284 285 286

	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
	bnz     do_slow_path_pf

	; Let Linux VM know that the page was accessed
287 288
	or      r0, r0, _PAGE_ACCESSED  ; set Accessed Bit
	st_s    r0, [r1]                ; Write back PTE
V
Vineet Gupta 已提交
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304

	CONV_PTE_TO_TLB
	COMMIT_ENTRY_TO_MMU
	TLBMISS_RESTORE_REGS
	rtie

ARC_EXIT EV_TLBMissI

;-----------------------------------------------------------------------------
; D-TLB Miss Exception Handler
;-----------------------------------------------------------------------------

ARC_ENTRY EV_TLBMissD

	TLBMISS_FREEUP_REGS

V
Vineet Gupta 已提交
305 306 307 308 309 310
#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
	ld  r0, [@numdtlb]
	add r0, r0, 1
	st  r0, [@numdtlb]
#endif

V
Vineet Gupta 已提交
311 312
	;----------------------------------------------------------------
	; Get the PTE corresponding to V-addr accessed
313
	; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA
V
Vineet Gupta 已提交
314 315 316 317 318
	LOAD_FAULT_PTE

	;----------------------------------------------------------------
	; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)

319 320 321 322 323 324 325 326 327
	cmp_s	r2, VMALLOC_START
	mov_s   r2, _PAGE_PRESENT	; common bit for K/U PTE
	or.hs	r2, r2, _PAGE_GLOBAL	; kernel PTE only

	; Linux PTE [RWX] bits are semantically overloaded:
	; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc)
	; -Otherwise they are user-mode permissions, and those are exactly
	;  same for kernel mode as well (e.g. copy_(to|from)_user)

V
Vineet Gupta 已提交
328 329
	lr      r3, [ecr]
	btst_s  r3, ECR_C_BIT_DTLB_LD_MISS	; Read Access
330
	or.nz   r2, r2, _PAGE_READ      	; chk for Read flag in PTE
V
Vineet Gupta 已提交
331
	btst_s  r3, ECR_C_BIT_DTLB_ST_MISS	; Write Access
332 333
	or.nz   r2, r2, _PAGE_WRITE     	; chk for Write flag in PTE
	; Above laddering takes care of XCHG access (both R and W)
V
Vineet Gupta 已提交
334 335 336 337 338 339 340 341

	; By now, r2 setup with all the Flags we need to check in PTE
	and     r3, r0, r2              ; Mask out NON Flag bits from PTE
	brne.d  r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test)

	;----------------------------------------------------------------
	; UPDATE_PTE: Let Linux VM know that page was accessed/dirty
	lr      r3, [ecr]
342
	or      r0, r0, _PAGE_ACCESSED        ; Accessed bit always
V
Vineet Gupta 已提交
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
	btst_s  r3,  ECR_C_BIT_DTLB_ST_MISS   ; See if it was a Write Access ?
	or.nz   r0, r0, _PAGE_MODIFIED        ; if Write, set Dirty bit as well
	st_s    r0, [r1]                      ; Write back PTE

	CONV_PTE_TO_TLB

#if (CONFIG_ARC_MMU_VER == 1)
	; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
	; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
	; But only for old MMU or one with Metal Fix
	TLB_WRITE_HEURISTICS
#endif

	COMMIT_ENTRY_TO_MMU
	TLBMISS_RESTORE_REGS
	rtie

;-------- Common routine to call Linux Page Fault Handler -----------
do_slow_path_pf:

	; Restore the 4-scratch regs saved by fast path miss handler
	TLBMISS_RESTORE_REGS

	; Slow path TLB Miss handled as a regular ARC Exception
	; (stack switching / save the complete reg-file).
368
	EXCEPTION_PROLOGUE
V
Vineet Gupta 已提交
369 370 371

	; ------- setup args for Linux Page fault Hanlder ---------
	mov_s r0, sp
372
	lr  r1, [efa]
V
Vineet Gupta 已提交
373 374 375 376 377 378 379 380 381 382 383 384 385

	; We don't want exceptions to be disabled while the fault is handled.
	; Now that we have saved the context we return from exception hence
	; exceptions get re-enable

	FAKE_RET_FROM_EXCPN  r9

	bl  do_page_fault
	b   ret_from_exception

ARC_EXIT EV_TLBMissD

ARC_ENTRY EV_TLBMissB   ; Bogus entry to measure sz of DTLBMiss hdlr