fault_32.c 8.5 KB
Newer Older
1 2
/*
 * Page fault handler for SH with an MMU.
L
Linus Torvalds 已提交
3 4
 *
 *  Copyright (C) 1999  Niibe Yutaka
P
Paul Mundt 已提交
5
 *  Copyright (C) 2003 - 2009  Paul Mundt
L
Linus Torvalds 已提交
6 7 8
 *
 *  Based on linux/arch/i386/mm/fault.c:
 *   Copyright (C) 1995  Linus Torvalds
9 10 11 12
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
L
Linus Torvalds 已提交
13 14 15
 */
#include <linux/kernel.h>
#include <linux/mm.h>
16 17
#include <linux/hardirq.h>
#include <linux/kprobes.h>
18
#include <linux/perf_event.h>
M
Magnus Damm 已提交
19
#include <asm/io_trapped.h>
L
Linus Torvalds 已提交
20 21
#include <asm/system.h>
#include <asm/mmu_context.h>
22
#include <asm/tlbflush.h>
L
Linus Torvalds 已提交
23

24 25 26 27
static inline int notify_page_fault(struct pt_regs *regs, int trap)
{
	int ret = 0;

28
	if (kprobes_built_in() && !user_mode(regs)) {
29 30 31 32 33 34 35 36 37
		preempt_disable();
		if (kprobe_running() && kprobe_fault_handler(regs, trap))
			ret = 1;
		preempt_enable();
	}

	return ret;
}

P
Paul Mundt 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
{
	unsigned index = pgd_index(address);
	pgd_t *pgd_k;
	pud_t *pud, *pud_k;
	pmd_t *pmd, *pmd_k;

	pgd += index;
	pgd_k = init_mm.pgd + index;

	if (!pgd_present(*pgd_k))
		return NULL;

	pud = pud_offset(pgd, address);
	pud_k = pud_offset(pgd_k, address);
	if (!pud_present(*pud_k))
		return NULL;

	pmd = pmd_offset(pud, address);
	pmd_k = pmd_offset(pud_k, address);
	if (!pmd_present(*pmd_k))
		return NULL;

	if (!pmd_present(*pmd))
		set_pmd(pmd, *pmd_k);
63 64 65 66 67 68
	else {
		/*
		 * The page tables are fully synchronised so there must
		 * be another reason for the fault. Return NULL here to
		 * signal that we have not taken care of the fault.
		 */
P
Paul Mundt 已提交
69
		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
70 71
		return NULL;
	}
P
Paul Mundt 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84

	return pmd_k;
}

/*
 * Handle a fault on the vmalloc or module mapping area
 */
static noinline int vmalloc_fault(unsigned long address)
{
	pgd_t *pgd_k;
	pmd_t *pmd_k;
	pte_t *pte_k;

85 86
	/* Make sure we are in vmalloc/module/P3 area: */
	if (!(address >= VMALLOC_START && address < P3_ADDR_MAX))
P
Paul Mundt 已提交
87 88 89 90 91 92 93 94 95 96
		return -1;

	/*
	 * Synchronize this task's top level page-table
	 * with the 'reference' page table.
	 *
	 * Do _not_ use "current" here. We might be inside
	 * an interrupt in the middle of a task switch..
	 */
	pgd_k = get_TTB();
97
	pmd_k = vmalloc_sync_one(pgd_k, address);
P
Paul Mundt 已提交
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
	if (!pmd_k)
		return -1;

	pte_k = pte_offset_kernel(pmd_k, address);
	if (!pte_present(*pte_k))
		return -1;

	return 0;
}

static int fault_in_kernel_space(unsigned long address)
{
	return address >= TASK_SIZE;
}

L
Linus Torvalds 已提交
113 114 115 116 117
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 */
118 119 120
asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
					unsigned long writeaccess,
					unsigned long address)
L
Linus Torvalds 已提交
121
{
P
Paul Mundt 已提交
122
	unsigned long vec;
L
Linus Torvalds 已提交
123 124 125
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct * vma;
126
	int si_code;
N
Nick Piggin 已提交
127
	int fault;
128
	siginfo_t info;
L
Linus Torvalds 已提交
129 130

	tsk = current;
P
Paul Mundt 已提交
131
	mm = tsk->mm;
132
	si_code = SEGV_MAPERR;
P
Paul Mundt 已提交
133
	vec = lookup_exception_vector();
L
Linus Torvalds 已提交
134

P
Paul Mundt 已提交
135 136 137 138 139 140 141 142 143 144 145
	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 */
	if (unlikely(fault_in_kernel_space(address))) {
		if (vmalloc_fault(address) >= 0)
S
Stuart Menefy 已提交
146
			return;
P
Paul Mundt 已提交
147
		if (notify_page_fault(regs, vec))
S
Stuart Menefy 已提交
148
			return;
S
Stuart Menefy 已提交
149

P
Paul Mundt 已提交
150
		goto bad_area_nosemaphore;
S
Stuart Menefy 已提交
151 152
	}

P
Paul Mundt 已提交
153
	if (unlikely(notify_page_fault(regs, vec)))
154 155
		return;

156
	/* Only enable interrupts if they were on before the fault */
157
	if ((regs->sr & SR_IMASK) != SR_IMASK)
158 159
		local_irq_enable();

160
	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
161

L
Linus Torvalds 已提交
162
	/*
P
Paul Mundt 已提交
163 164
	 * If we're in an interrupt, have no user context or are running
	 * in an atomic region then we must not take the fault:
L
Linus Torvalds 已提交
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
	 */
	if (in_atomic() || !mm)
		goto no_context;

	down_read(&mm->mmap_sem);

	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (expand_stack(vma, address))
		goto bad_area;
P
Paul Mundt 已提交
180 181 182 183 184

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */
L
Linus Torvalds 已提交
185
good_area:
186
	si_code = SEGV_ACCERR;
L
Linus Torvalds 已提交
187 188 189 190
	if (writeaccess) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
	} else {
191
		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
L
Linus Torvalds 已提交
192 193 194 195 196 197 198 199 200
			goto bad_area;
	}

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
survive:
201
	fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
N
Nick Piggin 已提交
202 203
	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
L
Linus Torvalds 已提交
204
			goto out_of_memory;
N
Nick Piggin 已提交
205 206 207
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
L
Linus Torvalds 已提交
208
	}
209
	if (fault & VM_FAULT_MAJOR) {
N
Nick Piggin 已提交
210
		tsk->maj_flt++;
211
		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
212 213
				     regs, address);
	} else {
N
Nick Piggin 已提交
214
		tsk->min_flt++;
215
		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
216 217
				     regs, address);
	}
L
Linus Torvalds 已提交
218 219 220 221

	up_read(&mm->mmap_sem);
	return;

P
Paul Mundt 已提交
222 223 224 225
	/*
	 * Something tried to access memory that isn't in our memory map..
	 * Fix it, but check if it's kernel or user first..
	 */
L
Linus Torvalds 已提交
226 227 228
bad_area:
	up_read(&mm->mmap_sem);

S
Stuart Menefy 已提交
229
bad_area_nosemaphore:
L
Linus Torvalds 已提交
230
	if (user_mode(regs)) {
231 232 233 234 235
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		info.si_code = si_code;
		info.si_addr = (void *) address;
		force_sig_info(SIGSEGV, &info, tsk);
L
Linus Torvalds 已提交
236 237 238 239 240 241 242 243
		return;
	}

no_context:
	/* Are we prepared to handle this kernel fault?  */
	if (fixup_exception(regs))
		return;

M
Magnus Damm 已提交
244 245
	if (handle_trapped_io(regs, address))
		return;
L
Linus Torvalds 已提交
246 247 248 249 250
/*
 * Oops. The kernel tried to access some bad page. We'll have to
 * terminate things with extreme prejudice.
 *
 */
251 252 253 254

	bust_spinlocks(1);

	if (oops_may_print()) {
255
		unsigned long page;
256 257 258 259 260 261 262 263 264 265 266

		if (address < PAGE_SIZE)
			printk(KERN_ALERT "Unable to handle kernel NULL "
					  "pointer dereference");
		else
			printk(KERN_ALERT "Unable to handle kernel paging "
					  "request");
		printk(" at virtual address %08lx\n", address);
		printk(KERN_ALERT "pc = %08lx\n", regs->pc);
		page = (unsigned long)get_TTB();
		if (page) {
267
			page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
268 269 270 271 272 273 274 275 276
			printk(KERN_ALERT "*pde = %08lx\n", page);
			if (page & _PAGE_PRESENT) {
				page &= PAGE_MASK;
				address &= 0x003ff000;
				page = ((__typeof__(page) *)
						__va(page))[address >>
							    PAGE_SHIFT];
				printk(KERN_ALERT "*pte = %08lx\n", page);
			}
L
Linus Torvalds 已提交
277 278
		}
	}
279

L
Linus Torvalds 已提交
280
	die("Oops", regs, writeaccess);
281
	bust_spinlocks(0);
L
Linus Torvalds 已提交
282 283 284 285 286 287 288 289
	do_exit(SIGKILL);

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
290
	if (is_global_init(current)) {
L
Linus Torvalds 已提交
291 292 293 294 295 296
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	printk("VM: killing process %s\n", tsk->comm);
	if (user_mode(regs))
297
		do_group_exit(SIGKILL);
L
Linus Torvalds 已提交
298 299 300 301 302 303 304 305 306
	goto no_context;

do_sigbus:
	up_read(&mm->mmap_sem);

	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
307 308 309 310 311
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void *)address;
	force_sig_info(SIGBUS, &info, tsk);
L
Linus Torvalds 已提交
312 313 314 315 316

	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
}
317 318 319 320

/*
 * Called with interrupts disabled.
 */
321 322 323
asmlinkage int __kprobes
handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
	       unsigned long address)
324 325 326 327 328 329
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	pte_t entry;
330

331 332 333 334 335 336 337 338
	/*
	 * We don't take page faults for P1, P2, and parts of P4, these
	 * are always mapped, whether it be due to legacy behaviour in
	 * 29-bit mode, or due to PMB configuration in 32-bit mode.
	 */
	if (address >= P3SEG && address < P3_ADDR_MAX) {
		pgd = pgd_offset_k(address);
	} else {
P
Paul Mundt 已提交
339
		if (unlikely(address >= TASK_SIZE || !current->mm))
340
			return 1;
341

P
Paul Mundt 已提交
342
		pgd = pgd_offset(current->mm, address);
343 344 345 346
	}

	pud = pud_offset(pgd, address);
	if (pud_none_or_clear_bad(pud))
347
		return 1;
348 349
	pmd = pmd_offset(pud, address);
	if (pmd_none_or_clear_bad(pmd))
350
		return 1;
P
Paul Mundt 已提交
351
	pte = pte_offset_kernel(pmd, address);
352 353
	entry = *pte;
	if (unlikely(pte_none(entry) || pte_not_present(entry)))
354
		return 1;
355
	if (unlikely(writeaccess && !pte_write(entry)))
356
		return 1;
357 358 359 360 361

	if (writeaccess)
		entry = pte_mkdirty(entry);
	entry = pte_mkyoung(entry);

362 363
	set_pte(pte, entry);

364 365
#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
	/*
366 367 368
	 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
	 * the case of an initial page write exception, so we need to
	 * flush it in order to avoid potential TLB entry duplication.
369
	 */
370 371
	if (writeaccess == 2)
		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
372 373
#endif

374
	update_mmu_cache(NULL, address, entry);
P
Paul Mundt 已提交
375

376
	return 0;
377
}