提交 b81a618d 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
  deal with races in /proc/*/{syscall,stack,personality}
  proc: enable writing to /proc/pid/mem
  proc: make check_mem_permission() return an mm_struct on success
  proc: hold cred_guard_mutex in check_mem_permission()
  proc: disable mem_write after exec
  mm: implement access_remote_vm
  mm: factor out main logic of access_process_vm
  mm: use mm_struct to resolve gate vma's in __get_user_pages
  mm: arch: rename in_gate_area_no_task to in_gate_area_no_mm
  mm: arch: make in_gate_area take an mm_struct instead of a task_struct
  mm: arch: make get_gate_vma take an mm_struct instead of a task_struct
  x86: mark associated mm when running a task in 32 bit compatibility mode
  x86: add context tag to mark mm when running a task in 32-bit compatibility mode
  auxv: require the target to be tracable (or yourself)
  close race in /proc/*/environ
  report errors in /proc/*/*map* sanely
  pagemap: close races with suid execve
  make sessionid permissions in /proc/*/task/* match those in /proc/*
  fix leaks in path_lookupat()

Fix up trivial conflicts in fs/proc/base.c
......@@ -820,17 +820,17 @@ static int __init vdso_init(void)
}
arch_initcall(vdso_init);
int in_gate_area_no_task(unsigned long addr)
int in_gate_area_no_mm(unsigned long addr)
{
return 0;
}
int in_gate_area(struct task_struct *task, unsigned long addr)
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
return 0;
}
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return NULL;
}
......
......@@ -337,17 +337,17 @@ static int __init vdso_init(void)
}
arch_initcall(vdso_init);
int in_gate_area_no_task(unsigned long addr)
int in_gate_area_no_mm(unsigned long addr)
{
return 0;
}
int in_gate_area(struct task_struct *task, unsigned long addr)
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
return 0;
}
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return NULL;
}
......@@ -94,17 +94,17 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return NULL;
}
struct vm_area_struct *get_gate_vma(struct task_struct *task)
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
return NULL;
}
int in_gate_area(struct task_struct *task, unsigned long address)
int in_gate_area(struct mm_struct *mm, unsigned long address)
{
return 0;
}
int in_gate_area_no_task(unsigned long address)
int in_gate_area_no_mm(unsigned long address)
{
return 0;
}
......@@ -298,6 +298,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
/* OK, This is the point of no return */
set_personality(PER_LINUX);
set_thread_flag(TIF_IA32);
current->mm->context.ia32_compat = 1;
setup_new_exec(bprm);
......
......@@ -13,6 +13,12 @@ typedef struct {
int size;
struct mutex lock;
void *vdso;
#ifdef CONFIG_X86_64
/* True if mm supports a task running in 32 bit compatibility mode. */
unsigned short ia32_compat;
#endif
} mm_context_t;
#ifdef CONFIG_SMP
......
......@@ -501,6 +501,10 @@ void set_personality_64bit(void)
/* Make sure to be in 64bit mode */
clear_thread_flag(TIF_IA32);
/* Ensure the corresponding mm is not marked. */
if (current->mm)
current->mm->context.ia32_compat = 0;
/* TBD: overwrites user setup. Should have two bits.
But 64bit processes have always behaved this way,
so it's not too bad. The main problem is just that
......@@ -516,6 +520,10 @@ void set_personality_ia32(void)
set_thread_flag(TIF_IA32);
current->personality |= force_personality32;
/* Mark the associated mm as containing 32-bit tasks. */
if (current->mm)
current->mm->context.ia32_compat = 1;
/* Prepare the first "return" to user space */
current_thread_info()->status |= TS_COMPAT;
}
......
......@@ -862,18 +862,18 @@ static struct vm_area_struct gate_vma = {
.vm_flags = VM_READ | VM_EXEC
};
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(tsk, TIF_IA32))
if (!mm || mm->context.ia32_compat)
return NULL;
#endif
return &gate_vma;
}
int in_gate_area(struct task_struct *task, unsigned long addr)
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma = get_gate_vma(task);
struct vm_area_struct *vma = get_gate_vma(mm);
if (!vma)
return 0;
......@@ -882,11 +882,11 @@ int in_gate_area(struct task_struct *task, unsigned long addr)
}
/*
* Use this when you have no reliable task/vma, typically from interrupt
* context. It is less reliable than using the task's vma and may give
* false positives:
* Use this when you have no reliable mm, typically from interrupt
* context. It is less reliable than using a task's mm and may give
* false positives.
*/
int in_gate_area_no_task(unsigned long addr)
int in_gate_area_no_mm(unsigned long addr)
{
return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
}
......
......@@ -417,24 +417,25 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return NULL;
}
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
struct mm_struct *mm = tsk->mm;
/* Check to see if this task was created in compat vdso mode */
/*
* Check to see if the corresponding task was created in compat vdso
* mode.
*/
if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
return &gate_vma;
return NULL;
}
int in_gate_area(struct task_struct *task, unsigned long addr)
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
const struct vm_area_struct *vma = get_gate_vma(task);
const struct vm_area_struct *vma = get_gate_vma(mm);
return vma && addr >= vma->vm_start && addr < vma->vm_end;
}
int in_gate_area_no_task(unsigned long addr)
int in_gate_area_no_mm(unsigned long addr)
{
return 0;
}
......
......@@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm)
segs = current->mm->map_count;
segs += elf_core_extra_phdrs();
gate_vma = get_gate_vma(current);
gate_vma = get_gate_vma(current->mm);
if (gate_vma != NULL)
segs++;
......
......@@ -1650,13 +1650,16 @@ static int path_lookupat(int dfd, const char *name,
err = -ECHILD;
}
if (!err)
if (!err) {
err = handle_reval_path(nd);
if (err)
path_put(&nd->path);
}
if (!err && nd->flags & LOOKUP_DIRECTORY) {
if (!nd->inode->i_op->lookup) {
path_put(&nd->path);
return -ENOTDIR;
err = -ENOTDIR;
}
}
......
......@@ -191,17 +191,20 @@ static int proc_root_link(struct inode *inode, struct path *path)
return result;
}
/*
* Return zero if current may access user memory in @task, -error if not.
*/
static int check_mem_permission(struct task_struct *task)
static struct mm_struct *__check_mem_permission(struct task_struct *task)
{
struct mm_struct *mm;
mm = get_task_mm(task);
if (!mm)
return ERR_PTR(-EINVAL);
/*
* A task can always look at itself, in case it chooses
* to use system calls instead of load instructions.
*/
if (task == current)
return 0;
return mm;
/*
* If current is actively ptrace'ing, and would also be
......@@ -213,27 +216,53 @@ static int check_mem_permission(struct task_struct *task)
match = (tracehook_tracer_task(task) == current);
rcu_read_unlock();
if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
return 0;
return mm;
}
/*
* Noone else is allowed.
*/
return -EPERM;
mmput(mm);
return ERR_PTR(-EPERM);
}
/*
* If current may access user memory in @task return a reference to the
* corresponding mm, otherwise ERR_PTR.
*/
static struct mm_struct *check_mem_permission(struct task_struct *task)
{
struct mm_struct *mm;
int err;
/*
* Avoid racing if task exec's as we might get a new mm but validate
* against old credentials.
*/
err = mutex_lock_killable(&task->signal->cred_guard_mutex);
if (err)
return ERR_PTR(err);
mm = __check_mem_permission(task);
mutex_unlock(&task->signal->cred_guard_mutex);
return mm;
}
struct mm_struct *mm_for_maps(struct task_struct *task)
{
struct mm_struct *mm;
int err;
if (mutex_lock_killable(&task->signal->cred_guard_mutex))
return NULL;
err = mutex_lock_killable(&task->signal->cred_guard_mutex);
if (err)
return ERR_PTR(err);
mm = get_task_mm(task);
if (mm && mm != current->mm &&
!ptrace_may_access(task, PTRACE_MODE_READ)) {
mmput(mm);
mm = NULL;
mm = ERR_PTR(-EACCES);
}
mutex_unlock(&task->signal->cred_guard_mutex);
......@@ -279,9 +308,9 @@ static int proc_pid_cmdline(struct task_struct *task, char * buffer)
static int proc_pid_auxv(struct task_struct *task, char *buffer)
{
int res = 0;
struct mm_struct *mm = get_task_mm(task);
if (mm) {
struct mm_struct *mm = mm_for_maps(task);
int res = PTR_ERR(mm);
if (mm && !IS_ERR(mm)) {
unsigned int nwords = 0;
do {
nwords += 2;
......@@ -318,6 +347,23 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
}
#endif /* CONFIG_KALLSYMS */
static int lock_trace(struct task_struct *task)
{
int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
if (err)
return err;
if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
mutex_unlock(&task->signal->cred_guard_mutex);
return -EPERM;
}
return 0;
}
static void unlock_trace(struct task_struct *task)
{
mutex_unlock(&task->signal->cred_guard_mutex);
}
#ifdef CONFIG_STACKTRACE
#define MAX_STACK_TRACE_DEPTH 64
......@@ -327,6 +373,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
{
struct stack_trace trace;
unsigned long *entries;
int err;
int i;
entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
......@@ -337,15 +384,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
trace.max_entries = MAX_STACK_TRACE_DEPTH;
trace.entries = entries;
trace.skip = 0;
save_stack_trace_tsk(task, &trace);
for (i = 0; i < trace.nr_entries; i++) {
seq_printf(m, "[<%pK>] %pS\n",
(void *)entries[i], (void *)entries[i]);
err = lock_trace(task);
if (!err) {
save_stack_trace_tsk(task, &trace);
for (i = 0; i < trace.nr_entries; i++) {
seq_printf(m, "[<%pK>] %pS\n",
(void *)entries[i], (void *)entries[i]);
}
unlock_trace(task);
}
kfree(entries);
return 0;
return err;
}
#endif
......@@ -508,18 +560,22 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
{
long nr;
unsigned long args[6], sp, pc;
int res = lock_trace(task);
if (res)
return res;
if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
return sprintf(buffer, "running\n");
if (nr < 0)
return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
return sprintf(buffer,
res = sprintf(buffer, "running\n");
else if (nr < 0)
res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
else
res = sprintf(buffer,
"%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
nr,
args[0], args[1], args[2], args[3], args[4], args[5],
sp, pc);
unlock_trace(task);
return res;
}
#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
......@@ -775,19 +831,14 @@ static ssize_t mem_read(struct file * file, char __user * buf,
if (!task)
goto out_no_task;
ret = check_mem_permission(task);
if (ret)
goto out;
ret = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
ret = 0;
mm = get_task_mm(task);
if (!mm)
mm = check_mem_permission(task);
ret = PTR_ERR(mm);
if (IS_ERR(mm))
goto out_free;
ret = -EIO;
......@@ -801,8 +852,8 @@ static ssize_t mem_read(struct file * file, char __user * buf,
int this_len, retval;
this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
retval = access_process_vm(task, src, page, this_len, 0);
if (!retval || check_mem_permission(task)) {
retval = access_remote_vm(mm, src, page, this_len, 0);
if (!retval) {
if (!ret)
ret = -EIO;
break;
......@@ -830,10 +881,6 @@ static ssize_t mem_read(struct file * file, char __user * buf,
return ret;
}
#define mem_write NULL
#ifndef mem_write
/* This is a security hazard */
static ssize_t mem_write(struct file * file, const char __user *buf,
size_t count, loff_t *ppos)
{
......@@ -841,19 +888,25 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
char *page;
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
unsigned long dst = *ppos;
struct mm_struct *mm;
copied = -ESRCH;
if (!task)
goto out_no_task;
copied = check_mem_permission(task);
if (copied)
goto out;
mm = check_mem_permission(task);
copied = PTR_ERR(mm);
if (IS_ERR(mm))
goto out_task;
copied = -EIO;
if (file->private_data != (void *)((long)current->self_exec_id))
goto out_mm;
copied = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
goto out_mm;
copied = 0;
while (count > 0) {
......@@ -864,7 +917,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
copied = -EFAULT;
break;
}
retval = access_process_vm(task, dst, page, this_len, 1);
retval = access_remote_vm(mm, dst, page, this_len, 1);
if (!retval) {
if (!copied)
copied = -EIO;
......@@ -877,12 +930,13 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
}
*ppos = dst;
free_page((unsigned long) page);
out:
out_mm:
mmput(mm);
out_task:
put_task_struct(task);
out_no_task:
return copied;
}
#endif
loff_t mem_lseek(struct file *file, loff_t offset, int orig)
{
......@@ -919,21 +973,18 @@ static ssize_t environ_read(struct file *file, char __user *buf,
if (!task)
goto out_no_task;
ret = -EPERM;
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out;
ret = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
ret = 0;
mm = get_task_mm(task);
if (!mm)
mm = mm_for_maps(task);
ret = PTR_ERR(mm);
if (!mm || IS_ERR(mm))
goto out_free;
ret = 0;
while (count > 0) {
int this_len, retval, max_len;
......@@ -2751,8 +2802,12 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
seq_printf(m, "%08x\n", task->personality);
return 0;
int err = lock_trace(task);
if (!err) {
seq_printf(m, "%08x\n", task->personality);
unlock_trace(task);
}
return err;
}
/*
......@@ -2771,7 +2826,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
ONE("personality", S_IRUGO, proc_pid_personality),
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
......@@ -2781,7 +2836,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
INF("syscall", S_IRUSR, proc_pid_syscall),
INF("syscall", S_IRUGO, proc_pid_syscall),
#endif
INF("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tgid_stat),
......@@ -2800,7 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_smaps_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
REG("pagemap", S_IRUGO, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
......@@ -2809,7 +2864,7 @@ static const struct pid_entry tgid_base_stuff[] = {
INF("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
ONE("stack", S_IRUGO, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, proc_pid_schedstat),
......@@ -3111,14 +3166,14 @@ static const struct pid_entry tid_base_stuff[] = {
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
ONE("personality", S_IRUGO, proc_pid_personality),
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
INF("syscall", S_IRUSR, proc_pid_syscall),
INF("syscall", S_IRUGO, proc_pid_syscall),
#endif
INF("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tid_stat),
......@@ -3136,7 +3191,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_smaps_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
REG("pagemap", S_IRUGO, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
......@@ -3145,7 +3200,7 @@ static const struct pid_entry tid_base_stuff[] = {
INF("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
ONE("stack", S_IRUGO, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, proc_pid_schedstat),
......@@ -3164,7 +3219,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUSR, proc_sessionid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
......
......@@ -121,14 +121,14 @@ static void *m_start(struct seq_file *m, loff_t *pos)
priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
if (!priv->task)
return NULL;
return ERR_PTR(-ESRCH);
mm = mm_for_maps(priv->task);
if (!mm)
return NULL;
if (!mm || IS_ERR(mm))
return mm;
down_read(&mm->mmap_sem);
tail_vma = get_gate_vma(priv->task);
tail_vma = get_gate_vma(priv->task->mm);
priv->tail_vma = tail_vma;
/* Start with last addr hint */
......@@ -279,7 +279,8 @@ static int show_map(struct seq_file *m, void *v)
show_map_vma(m, vma);
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
m->version = (vma != get_gate_vma(task->mm))
? vma->vm_start : 0;
return 0;
}
......@@ -468,7 +469,8 @@ static int show_smap(struct seq_file *m, void *v)
(unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
m->version = (vma != get_gate_vma(task->mm))
? vma->vm_start : 0;
return 0;
}
......@@ -764,8 +766,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!task)
goto out;
ret = -EACCES;
if (!ptrace_may_access(task, PTRACE_MODE_READ))
mm = mm_for_maps(task);
ret = PTR_ERR(mm);
if (!mm || IS_ERR(mm))
goto out_task;
ret = -EINVAL;
......@@ -778,10 +781,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!count)
goto out_task;
mm = get_task_mm(task);
if (!mm)
goto out_task;
pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
ret = -ENOMEM;
......
......@@ -199,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos)
/* pin the task and mm whilst we play with them */
priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
if (!priv->task)
return NULL;
return ERR_PTR(-ESRCH);
mm = mm_for_maps(priv->task);
if (!mm) {
if (!mm || IS_ERR(mm)) {
put_task_struct(priv->task);
priv->task = NULL;
return NULL;
return mm;
}
down_read(&mm->mmap_sem);
......
......@@ -982,6 +982,8 @@ static inline int handle_mm_fault(struct mm_struct *mm,
extern int make_pages_present(unsigned long addr, unsigned long end);
extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, int write);
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int len, unsigned int foll_flags,
......@@ -1592,13 +1594,13 @@ static inline bool kernel_page_present(struct page *page) { return true; }
#endif /* CONFIG_HIBERNATION */
#endif
extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
#ifdef __HAVE_ARCH_GATE_AREA
int in_gate_area_no_task(unsigned long addr);
int in_gate_area(struct task_struct *task, unsigned long addr);
int in_gate_area_no_mm(unsigned long addr);
int in_gate_area(struct mm_struct *mm, unsigned long addr);
#else
int in_gate_area_no_task(unsigned long addr);
#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
int in_gate_area_no_mm(unsigned long addr);
#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
#endif /* __HAVE_ARCH_GATE_AREA */
int drop_caches_sysctl_handler(struct ctl_table *, int,
......
......@@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr)
if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
arch_is_kernel_text(addr))
return 1;
return in_gate_area_no_task(addr);
return in_gate_area_no_mm(addr);
}
static inline int is_kernel(unsigned long addr)
{
if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
return 1;
return in_gate_area_no_task(addr);
return in_gate_area_no_mm(addr);
}
static int is_ksym_addr(unsigned long addr)
......
......@@ -1486,9 +1486,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
struct vm_area_struct *vma;
vma = find_extend_vma(mm, start);
if (!vma && in_gate_area(tsk, start)) {
if (!vma && in_gate_area(mm, start)) {
unsigned long pg = start & PAGE_MASK;
struct vm_area_struct *gate_vma = get_gate_vma(tsk);
struct vm_area_struct *gate_vma = get_gate_vma(mm);
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
......@@ -1591,10 +1591,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
return i ? i : -EFAULT;
BUG();
}
if (ret & VM_FAULT_MAJOR)
tsk->maj_flt++;
else
tsk->min_flt++;
if (tsk) {
if (ret & VM_FAULT_MAJOR)
tsk->maj_flt++;
else
tsk->min_flt++;
}
if (ret & VM_FAULT_RETRY) {
if (nonblocking)
......@@ -1641,7 +1644,8 @@ EXPORT_SYMBOL(__get_user_pages);
/**
* get_user_pages() - pin user pages in memory
* @tsk: task_struct of target task
* @tsk: the task_struct to use for page fault accounting, or
* NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
......@@ -3499,7 +3503,7 @@ static int __init gate_vma_init(void)
__initcall(gate_vma_init);
#endif
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef AT_SYSINFO_EHDR
return &gate_vma;
......@@ -3508,7 +3512,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
#endif
}
int in_gate_area_no_task(unsigned long addr)
int in_gate_area_no_mm(unsigned long addr)
{
#ifdef AT_SYSINFO_EHDR
if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
......@@ -3649,20 +3653,15 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
#endif
/*
* Access another process' address space.
* Source/target buffer must be kernel space,
* Do not walk the page table directly, use get_user_pages
* Access another process' address space as given in mm. If non-NULL, use the
* given task for page fault accounting.
*/
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, int write)
{
struct mm_struct *mm;
struct vm_area_struct *vma;
void *old_buf = buf;
mm = get_task_mm(tsk);
if (!mm)
return 0;
down_read(&mm->mmap_sem);
/* ignore errors, just check how much was successfully transferred */
while (len) {
......@@ -3711,11 +3710,47 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
addr += bytes;
}
up_read(&mm->mmap_sem);
mmput(mm);
return buf - old_buf;
}
/**
* @access_remote_vm - access another process' address space
* @mm: the mm_struct of the target address space
* @addr: start address to access
* @buf: source or destination buffer
* @len: number of bytes to transfer
* @write: whether the access is a write
*
* The caller must hold a reference on @mm.
*/
int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, int write)
{
return __access_remote_vm(NULL, mm, addr, buf, len, write);
}
/*
* Access another process' address space.
* Source/target buffer must be kernel space,
* Do not walk the page table directly, use get_user_pages
*/
int access_process_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, int write)
{
struct mm_struct *mm;
int ret;
mm = get_task_mm(tsk);
if (!mm)
return 0;
ret = __access_remote_vm(tsk, mm, addr, buf, len, write);
mmput(mm);
return ret;
}
/*
* Print the name of a VMA.
*/
......
......@@ -237,7 +237,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current))) {
vma == get_gate_vma(current->mm))) {
__mlock_vma_pages_range(vma, start, end, NULL);
......@@ -332,7 +332,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
int lock = newflags & VM_LOCKED;
if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
is_vm_hugetlb_page(vma) || vma == get_gate_vma(current))
is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
goto out; /* don't set VM_LOCKED, don't count */
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
......
......@@ -1963,7 +1963,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
return -ENOMEM;
}
int in_gate_area_no_task(unsigned long addr)
int in_gate_area_no_mm(unsigned long addr)
{
return 0;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册