提交 f7cfd871 编写于 作者: E Eric W. Biederman

exec: Transform exec_update_mutex into a rw_semaphore

Recently syzbot reported[0] that there is a deadlock amongst the users
of exec_update_mutex.  The problematic lock ordering found by lockdep
was:

   perf_event_open  (exec_update_mutex -> ovl_i_mutex)
   chown            (ovl_i_mutex       -> sb_writes)
   sendfile         (sb_writes         -> p->lock)
     by reading from a proc file and writing to overlayfs
   proc_pid_syscall (p->lock           -> exec_update_mutex)

While looking at possible solutions it occured to me that all of the
users and possible users involved only wanted to state of the given
process to remain the same.  They are all readers.  The only writer is
exec.

There is no reason for readers to block on each other.  So fix
this deadlock by transforming exec_update_mutex into a rw_semaphore
named exec_update_lock that only exec takes for writing.

Cc: Jann Horn <jannh@google.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Bernd Edlinger <bernd.edlinger@hotmail.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Christopher Yeoh <cyeoh@au1.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Sargun Dhillon <sargun@sargun.me>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Fixes: eea96732 ("exec: Add exec_update_mutex to replace cred_guard_mutex")
[0] https://lkml.kernel.org/r/00000000000063640c05ade8e3de@google.com
Reported-by: syzbot+db9cdf3dd1f64252c6ef@syzkaller.appspotmail.com
Link: https://lkml.kernel.org/r/87ft4mbqen.fsf@x220.int.ebiederm.orgSigned-off-by: NEric W. Biederman <ebiederm@xmission.com>
上级 31784cff
...@@ -965,8 +965,8 @@ EXPORT_SYMBOL(read_code); ...@@ -965,8 +965,8 @@ EXPORT_SYMBOL(read_code);
/* /*
* Maps the mm_struct mm into the current task struct. * Maps the mm_struct mm into the current task struct.
* On success, this function returns with the mutex * On success, this function returns with exec_update_lock
* exec_update_mutex locked. * held for writing.
*/ */
static int exec_mmap(struct mm_struct *mm) static int exec_mmap(struct mm_struct *mm)
{ {
...@@ -981,7 +981,7 @@ static int exec_mmap(struct mm_struct *mm) ...@@ -981,7 +981,7 @@ static int exec_mmap(struct mm_struct *mm)
if (old_mm) if (old_mm)
sync_mm_rss(old_mm); sync_mm_rss(old_mm);
ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); ret = down_write_killable(&tsk->signal->exec_update_lock);
if (ret) if (ret)
return ret; return ret;
...@@ -995,7 +995,7 @@ static int exec_mmap(struct mm_struct *mm) ...@@ -995,7 +995,7 @@ static int exec_mmap(struct mm_struct *mm)
mmap_read_lock(old_mm); mmap_read_lock(old_mm);
if (unlikely(old_mm->core_state)) { if (unlikely(old_mm->core_state)) {
mmap_read_unlock(old_mm); mmap_read_unlock(old_mm);
mutex_unlock(&tsk->signal->exec_update_mutex); up_write(&tsk->signal->exec_update_lock);
return -EINTR; return -EINTR;
} }
} }
...@@ -1382,7 +1382,7 @@ int begin_new_exec(struct linux_binprm * bprm) ...@@ -1382,7 +1382,7 @@ int begin_new_exec(struct linux_binprm * bprm)
return 0; return 0;
out_unlock: out_unlock:
mutex_unlock(&me->signal->exec_update_mutex); up_write(&me->signal->exec_update_lock);
out: out:
return retval; return retval;
} }
...@@ -1423,7 +1423,7 @@ void setup_new_exec(struct linux_binprm * bprm) ...@@ -1423,7 +1423,7 @@ void setup_new_exec(struct linux_binprm * bprm)
* some architectures like powerpc * some architectures like powerpc
*/ */
me->mm->task_size = TASK_SIZE; me->mm->task_size = TASK_SIZE;
mutex_unlock(&me->signal->exec_update_mutex); up_write(&me->signal->exec_update_lock);
mutex_unlock(&me->signal->cred_guard_mutex); mutex_unlock(&me->signal->cred_guard_mutex);
} }
EXPORT_SYMBOL(setup_new_exec); EXPORT_SYMBOL(setup_new_exec);
......
...@@ -405,11 +405,11 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, ...@@ -405,11 +405,11 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
static int lock_trace(struct task_struct *task) static int lock_trace(struct task_struct *task)
{ {
int err = mutex_lock_killable(&task->signal->exec_update_mutex); int err = down_read_killable(&task->signal->exec_update_lock);
if (err) if (err)
return err; return err;
if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
mutex_unlock(&task->signal->exec_update_mutex); up_read(&task->signal->exec_update_lock);
return -EPERM; return -EPERM;
} }
return 0; return 0;
...@@ -417,7 +417,7 @@ static int lock_trace(struct task_struct *task) ...@@ -417,7 +417,7 @@ static int lock_trace(struct task_struct *task)
static void unlock_trace(struct task_struct *task) static void unlock_trace(struct task_struct *task)
{ {
mutex_unlock(&task->signal->exec_update_mutex); up_read(&task->signal->exec_update_lock);
} }
#ifdef CONFIG_STACKTRACE #ifdef CONFIG_STACKTRACE
...@@ -2930,7 +2930,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh ...@@ -2930,7 +2930,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
unsigned long flags; unsigned long flags;
int result; int result;
result = mutex_lock_killable(&task->signal->exec_update_mutex); result = down_read_killable(&task->signal->exec_update_lock);
if (result) if (result)
return result; return result;
...@@ -2966,7 +2966,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh ...@@ -2966,7 +2966,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
result = 0; result = 0;
out_unlock: out_unlock:
mutex_unlock(&task->signal->exec_update_mutex); up_read(&task->signal->exec_update_lock);
return result; return result;
} }
......
...@@ -228,12 +228,13 @@ struct signal_struct { ...@@ -228,12 +228,13 @@ struct signal_struct {
* credential calculations * credential calculations
* (notably. ptrace) * (notably. ptrace)
* Deprecated do not use in new code. * Deprecated do not use in new code.
* Use exec_update_mutex instead. * Use exec_update_lock instead.
*/
struct mutex exec_update_mutex; /* Held while task_struct is being
* updated during exec, and may have
* inconsistent permissions.
*/ */
struct rw_semaphore exec_update_lock; /* Held while task_struct is
* being updated during exec,
* and may have inconsistent
* permissions.
*/
} __randomize_layout; } __randomize_layout;
/* /*
......
...@@ -26,7 +26,7 @@ static struct signal_struct init_signals = { ...@@ -26,7 +26,7 @@ static struct signal_struct init_signals = {
.multiprocess = HLIST_HEAD_INIT, .multiprocess = HLIST_HEAD_INIT,
.rlim = INIT_RLIMITS, .rlim = INIT_RLIMITS,
.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
.exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), .exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
#ifdef CONFIG_POSIX_TIMERS #ifdef CONFIG_POSIX_TIMERS
.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
.cputimer = { .cputimer = {
......
...@@ -1325,7 +1325,7 @@ static void put_ctx(struct perf_event_context *ctx) ...@@ -1325,7 +1325,7 @@ static void put_ctx(struct perf_event_context *ctx)
* function. * function.
* *
* Lock order: * Lock order:
* exec_update_mutex * exec_update_lock
* task_struct::perf_event_mutex * task_struct::perf_event_mutex
* perf_event_context::mutex * perf_event_context::mutex
* perf_event::child_mutex; * perf_event::child_mutex;
...@@ -11721,14 +11721,14 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -11721,14 +11721,14 @@ SYSCALL_DEFINE5(perf_event_open,
} }
if (task) { if (task) {
err = mutex_lock_interruptible(&task->signal->exec_update_mutex); err = down_read_interruptible(&task->signal->exec_update_lock);
if (err) if (err)
goto err_task; goto err_task;
/* /*
* Preserve ptrace permission check for backwards compatibility. * Preserve ptrace permission check for backwards compatibility.
* *
* We must hold exec_update_mutex across this and any potential * We must hold exec_update_lock across this and any potential
* perf_install_in_context() call for this new event to * perf_install_in_context() call for this new event to
* serialize against exec() altering our credentials (and the * serialize against exec() altering our credentials (and the
* perf_event_exit_task() that could imply). * perf_event_exit_task() that could imply).
...@@ -12017,7 +12017,7 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -12017,7 +12017,7 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
if (task) { if (task) {
mutex_unlock(&task->signal->exec_update_mutex); up_read(&task->signal->exec_update_lock);
put_task_struct(task); put_task_struct(task);
} }
...@@ -12053,7 +12053,7 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -12053,7 +12053,7 @@ SYSCALL_DEFINE5(perf_event_open,
free_event(event); free_event(event);
err_cred: err_cred:
if (task) if (task)
mutex_unlock(&task->signal->exec_update_mutex); up_read(&task->signal->exec_update_lock);
err_task: err_task:
if (task) if (task)
put_task_struct(task); put_task_struct(task);
...@@ -12358,7 +12358,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) ...@@ -12358,7 +12358,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
/* /*
* When a child task exits, feed back event values to parent events. * When a child task exits, feed back event values to parent events.
* *
* Can be called with exec_update_mutex held when called from * Can be called with exec_update_lock held when called from
* setup_new_exec(). * setup_new_exec().
*/ */
void perf_event_exit_task(struct task_struct *child) void perf_event_exit_task(struct task_struct *child)
......
...@@ -1221,7 +1221,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) ...@@ -1221,7 +1221,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
struct mm_struct *mm; struct mm_struct *mm;
int err; int err;
err = mutex_lock_killable(&task->signal->exec_update_mutex); err = down_read_killable(&task->signal->exec_update_lock);
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
...@@ -1231,7 +1231,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) ...@@ -1231,7 +1231,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
mmput(mm); mmput(mm);
mm = ERR_PTR(-EACCES); mm = ERR_PTR(-EACCES);
} }
mutex_unlock(&task->signal->exec_update_mutex); up_read(&task->signal->exec_update_lock);
return mm; return mm;
} }
...@@ -1591,7 +1591,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) ...@@ -1591,7 +1591,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->oom_score_adj_min = current->signal->oom_score_adj_min; sig->oom_score_adj_min = current->signal->oom_score_adj_min;
mutex_init(&sig->cred_guard_mutex); mutex_init(&sig->cred_guard_mutex);
mutex_init(&sig->exec_update_mutex); init_rwsem(&sig->exec_update_lock);
return 0; return 0;
} }
......
...@@ -75,25 +75,25 @@ get_file_raw_ptr(struct task_struct *task, unsigned int idx) ...@@ -75,25 +75,25 @@ get_file_raw_ptr(struct task_struct *task, unsigned int idx)
return file; return file;
} }
static void kcmp_unlock(struct mutex *m1, struct mutex *m2) static void kcmp_unlock(struct rw_semaphore *l1, struct rw_semaphore *l2)
{ {
if (likely(m2 != m1)) if (likely(l2 != l1))
mutex_unlock(m2); up_read(l2);
mutex_unlock(m1); up_read(l1);
} }
static int kcmp_lock(struct mutex *m1, struct mutex *m2) static int kcmp_lock(struct rw_semaphore *l1, struct rw_semaphore *l2)
{ {
int err; int err;
if (m2 > m1) if (l2 > l1)
swap(m1, m2); swap(l1, l2);
err = mutex_lock_killable(m1); err = down_read_killable(l1);
if (!err && likely(m1 != m2)) { if (!err && likely(l1 != l2)) {
err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING); err = down_read_killable_nested(l2, SINGLE_DEPTH_NESTING);
if (err) if (err)
mutex_unlock(m1); up_read(l1);
} }
return err; return err;
...@@ -173,8 +173,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, ...@@ -173,8 +173,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
/* /*
* One should have enough rights to inspect task details. * One should have enough rights to inspect task details.
*/ */
ret = kcmp_lock(&task1->signal->exec_update_mutex, ret = kcmp_lock(&task1->signal->exec_update_lock,
&task2->signal->exec_update_mutex); &task2->signal->exec_update_lock);
if (ret) if (ret)
goto err; goto err;
if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) || if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) ||
...@@ -229,8 +229,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, ...@@ -229,8 +229,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
} }
err_unlock: err_unlock:
kcmp_unlock(&task1->signal->exec_update_mutex, kcmp_unlock(&task1->signal->exec_update_lock,
&task2->signal->exec_update_mutex); &task2->signal->exec_update_lock);
err: err:
put_task_struct(task1); put_task_struct(task1);
put_task_struct(task2); put_task_struct(task2);
......
...@@ -628,7 +628,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd) ...@@ -628,7 +628,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd)
struct file *file; struct file *file;
int ret; int ret;
ret = mutex_lock_killable(&task->signal->exec_update_mutex); ret = down_read_killable(&task->signal->exec_update_lock);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -637,7 +637,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd) ...@@ -637,7 +637,7 @@ static struct file *__pidfd_fget(struct task_struct *task, int fd)
else else
file = ERR_PTR(-EPERM); file = ERR_PTR(-EPERM);
mutex_unlock(&task->signal->exec_update_mutex); up_read(&task->signal->exec_update_lock);
return file ?: ERR_PTR(-EBADF); return file ?: ERR_PTR(-EBADF);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册