提交 759496ba 编写于 作者: J Johannes Weiner 提交者: Linus Torvalds

arch: mm: pass userspace fault flag to generic fault handler

Unlike global OOM handling, memory cgroup code will invoke the OOM killer
in any OOM situation because it has no way of telling faults occuring in
kernel context - which could be handled more gracefully - from
user-triggered faults.

Pass a flag that identifies faults originating in user space from the
architecture-specific fault handlers to generic code so that memcg OOM
handling can be improved.
Signed-off-by: NJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: NMichal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 87134102
......@@ -89,8 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
const struct exception_table_entry *fixup;
int fault, si_code = SEGV_MAPERR;
siginfo_t info;
unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(cause > 0 ? FAULT_FLAG_WRITE : 0));
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
(or is suppressed by the PALcode). Support that for older CPUs
......@@ -115,7 +114,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
if (address >= TASK_SIZE)
goto vmalloc_fault;
#endif
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
......@@ -142,6 +142,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
} else {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
}
/* If for any reason at all we couldn't handle the fault,
......
......@@ -60,8 +60,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
siginfo_t info;
int fault, ret;
int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
/*
* We fault-in kernel-space virtual memory on-demand. The
......@@ -89,6 +88,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
......@@ -117,6 +118,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
......
......@@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
int write = fsr & FSR_WRITE;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (notify_page_fault(regs, fsr))
return 0;
......@@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (fsr & FSR_WRITE)
flags |= FAULT_FLAG_WRITE;
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
......
......@@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (esr & ESR_LNX_EXEC) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
tsk = current;
mm = tsk->mm;
......@@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;
if (esr & ESR_LNX_EXEC) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
......
......@@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
local_irq_enable();
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
......
......@@ -58,8 +58,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
struct vm_area_struct * vma;
siginfo_t info;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
D(printk(KERN_DEBUG
"Page fault for %lX on %X at %lX, prot %d write %d\n",
......@@ -117,6 +116,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
......@@ -155,6 +156,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
} else if (writeaccess == 1) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
......
......@@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long _pme, lrai, lrad, fixup;
unsigned long flags = 0;
siginfo_t info;
pgd_t *pge;
pud_t *pue;
pte_t *pte;
int write;
int fault;
#if 0
......@@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
if (in_atomic() || !mm)
goto no_context;
if (user_mode(__frame))
flags |= FAULT_FLAG_USER;
down_read(&mm->mmap_sem);
vma = find_vma(mm, ear0);
......@@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
*/
good_area:
info.si_code = SEGV_ACCERR;
write = 0;
switch (esr0 & ESR0_ATXC) {
default:
/* handle write to write protected page */
......@@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
#endif
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
write = 1;
flags |= FAULT_FLAG_WRITE;
break;
/* handle read from protected page */
......@@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
fault = handle_mm_fault(mm, vma, ear0, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
......
......@@ -53,8 +53,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
int si_code = SEGV_MAPERR;
int fault;
const struct exception_table_entry *fixup;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(cause > 0 ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
/*
* If we're in an interrupt or have no user context,
......@@ -65,6 +64,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
local_irq_enable();
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
......@@ -96,6 +97,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
case FLT_STORE:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
break;
}
......
......@@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
/* mmap_sem is performance critical.... */
prefetchw(&mm->mmap_sem);
......@@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
if (notify_page_fault(regs, TRAP_BRKPT))
return;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (mask & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
retry:
down_read(&mm->mmap_sem);
......
......@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm;
struct vm_area_struct * vma;
unsigned long page, addr;
int write;
unsigned long flags = 0;
int fault;
siginfo_t info;
......@@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
if (error_code & ACE_USERMODE)
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
......@@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
*/
good_area:
info.si_code = SEGV_ACCERR;
write = 0;
switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
default: /* 3: write, present */
/* fall through */
case ACE_WRITE: /* write, not present */
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
write++;
flags |= FAULT_FLAG_WRITE;
break;
case ACE_PROTECTION: /* read, present */
case 0: /* read, not present */
......@@ -194,7 +196,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
*/
addr = (address & PAGE_MASK);
set_thread_fault_code(error_code);
fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
fault = handle_mm_fault(mm, vma, addr, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
......
......@@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
......
......@@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
struct vm_area_struct *vma, *prev_vma;
siginfo_t info;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write_access ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current;
......@@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
......@@ -121,6 +122,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (write_access) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
......
......@@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
int code = SEGV_MAPERR;
int is_write = error_code & ESR_S;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(is_write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
regs->ear = address;
regs->esr = error_code;
......@@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV);
}
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
......@@ -199,6 +201,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
if (unlikely(is_write)) {
if (unlikely(!(vma->vm_flags & VM_WRITE)))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
/* protection fault */
......
......@@ -42,8 +42,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
const int field = sizeof(unsigned long) * 2;
siginfo_t info;
int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
#if 0
printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
......@@ -93,6 +92,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
......@@ -114,6 +115,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (cpu_has_rixi) {
if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
......
......@@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
if (in_atomic() || !mm)
goto no_context;
if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
......
......@@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs)) {
/* Exception was in userspace: reenable interrupts */
local_irq_enable();
flags |= FAULT_FLAG_USER;
} else {
/* If exception was in a syscall, then IRQ's may have
* been enabled or disabled. If they were enabled,
......
......@@ -180,6 +180,10 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (acc_type & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
retry:
down_read(&mm->mmap_sem);
vma = find_vma_prev(mm, address, &prev_vma);
......@@ -203,8 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
* fault.
*/
fault = handle_mm_fault(mm, vma, address,
flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0));
fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
return;
......
......@@ -223,9 +223,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */
if (is_write)
flags |= FAULT_FLAG_WRITE;
#ifdef CONFIG_PPC_ICSWX
/*
* we need to do this early because this "data storage
......@@ -288,6 +285,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs))
store_update_sp = store_updates_sp(regs);
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
......@@ -415,6 +415,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
/* protection fault */
......
......@@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem);
......
......@@ -47,6 +47,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
const int field = sizeof(unsigned long) * 2;
unsigned long flags = 0;
siginfo_t info;
int fault;
......@@ -75,6 +76,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (!vma)
......@@ -95,6 +99,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
goto bad_area;
......@@ -105,7 +110,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
fault = handle_mm_fault(mm, vma, address, write);
fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
......
......@@ -400,9 +400,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
struct mm_struct *mm;
struct vm_area_struct * vma;
int fault;
int write = error_code & FAULT_CODE_WRITE;
unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0));
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current;
mm = tsk->mm;
......@@ -476,6 +474,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
set_thread_fault_code(error_code);
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (error_code & FAULT_CODE_WRITE)
flags |= FAULT_FLAG_WRITE;
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
......
......@@ -177,8 +177,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
unsigned long g2;
int from_user = !(regs->psr & PSR_PS);
int fault, code;
unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0));
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (text_fault)
address = regs->pc;
......@@ -235,6 +234,11 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
goto bad_area;
}
if (from_user)
flags |= FAULT_FLAG_USER;
if (write)
flags |= FAULT_FLAG_WRITE;
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
......@@ -383,6 +387,7 @@ static void force_user_fault(unsigned long address, int write)
struct vm_area_struct *vma;
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
unsigned int flags = FAULT_FLAG_USER;
int code;
code = SEGV_MAPERR;
......@@ -402,11 +407,12 @@ static void force_user_fault(unsigned long address, int write)
if (write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) {
switch (handle_mm_fault(mm, vma, address, flags)) {
case VM_FAULT_SIGBUS:
case VM_FAULT_OOM:
goto do_sigbus;
......
......@@ -315,7 +315,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
bad_kernel_pc(regs, address);
return;
}
}
} else
flags |= FAULT_FLAG_USER;
/*
* If we're in an interrupt or have no user
......@@ -418,13 +419,14 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
vma->vm_file != NULL)
set_thread_fault_code(fault_code |
FAULT_CODE_BLKCOMMIT);
flags |= FAULT_FLAG_WRITE;
} else {
/* Allow reads even for write-only mappings */
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
......
......@@ -280,8 +280,7 @@ static int handle_page_fault(struct pt_regs *regs,
if (!is_page_fault)
write = 1;
flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0));
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
is_kernel_mode = !user_mode(regs);
......@@ -365,6 +364,9 @@ static int handle_page_fault(struct pt_regs *regs,
goto bad_area_nosemaphore;
}
if (!is_kernel_mode)
flags |= FAULT_FLAG_USER;
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
......@@ -425,6 +427,7 @@ static int handle_page_fault(struct pt_regs *regs,
#endif
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else {
if (!is_page_fault || !(vma->vm_flags & VM_READ))
goto bad_area;
......
......@@ -30,8 +30,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
pmd_t *pmd;
pte_t *pte;
int err = -EFAULT;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(is_write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
*code_out = SEGV_MAPERR;
......@@ -42,6 +41,8 @@ int handle_page_fault(unsigned long address, unsigned long ip,
if (in_atomic())
goto out_nosemaphore;
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
......@@ -58,12 +59,15 @@ int handle_page_fault(unsigned long address, unsigned long ip,
good_area:
*code_out = SEGV_ACCERR;
if (is_write && !(vma->vm_flags & VM_WRITE))
goto out;
/* Don't require VM_READ|VM_EXEC for write faults! */
if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
goto out;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto out;
flags |= FAULT_FLAG_WRITE;
} else {
/* Don't require VM_READ|VM_EXEC for write faults! */
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto out;
}
do {
int fault;
......
......@@ -209,8 +209,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
struct task_struct *tsk;
struct mm_struct *mm;
int fault, sig, code;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current;
mm = tsk->mm;
......@@ -222,6 +221,11 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (in_atomic() || !mm)
goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (!(fsr ^ 0x12))
flags |= FAULT_FLAG_WRITE;
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
......
......@@ -1011,9 +1011,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
unsigned long address;
struct mm_struct *mm;
int fault;
int write = error_code & PF_WRITE;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
tsk = current;
mm = tsk->mm;
......@@ -1083,6 +1081,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (user_mode_vm(regs)) {
local_irq_enable();
error_code |= PF_USER;
flags |= FAULT_FLAG_USER;
} else {
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
......@@ -1109,6 +1108,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
return;
}
if (error_code & PF_WRITE)
flags |= FAULT_FLAG_WRITE;
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in
......
......@@ -72,6 +72,8 @@ void do_page_fault(struct pt_regs *regs)
address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
#endif
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
......
......@@ -176,6 +176,7 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
#define FAULT_FLAG_TRIED 0x40 /* second try */
#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册