diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index b01490b71f32837cc3b3ab58331fcd9c032a5114..170cc5bc7da2b46682091d1b67ce78f20c1a1aa6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -2212,6 +2213,45 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) } EXPORT_SYMBOL_GPL(task_cgroup_path); +/** + * cgroup_attach_lock - Lock for ->attach() + * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem + * + * cgroup migration sometimes needs to stabilize threadgroups against forks and + * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach() + * implementations (e.g. cpuset), also need to disable CPU hotplug. + * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can + * lead to deadlocks. + * + * Bringing up a CPU may involve creating and destroying tasks which requires + * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside + * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while + * write-locking threadgroup_rwsem, the locking order is reversed and we end up + * waiting for an on-going CPU hotplug operation which in turn is waiting for + * the threadgroup_rwsem to be released to create new tasks. For more details: + * + * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu + * + * Resolve the situation by always acquiring cpus_read_lock() before optionally + * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that + * CPU hotplug is disabled on entry. + */ +static void cgroup_attach_lock(void) +{ + get_online_cpus(); + percpu_down_write(&cgroup_threadgroup_rwsem); +} + +/** + * cgroup_attach_unlock - Undo cgroup_attach_lock() + * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem + */ +static void cgroup_attach_unlock(void) +{ + percpu_up_write(&cgroup_threadgroup_rwsem); + put_online_cpus(); +} + /** * cgroup_migrate_add_task - add a migration target task to a migration context * @task: target task @@ -2691,7 +2731,7 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup) if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return ERR_PTR(-EINVAL); - percpu_down_write(&cgroup_threadgroup_rwsem); + cgroup_attach_lock(); rcu_read_lock(); if (pid) { @@ -2722,7 +2762,7 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup) goto out_unlock_rcu; out_unlock_threadgroup: - percpu_up_write(&cgroup_threadgroup_rwsem); + cgroup_attach_unlock(); out_unlock_rcu: rcu_read_unlock(); return tsk; @@ -2737,7 +2777,7 @@ void cgroup_procs_write_finish(struct task_struct *task) /* release reference from cgroup_procs_write_start() */ put_task_struct(task); - percpu_up_write(&cgroup_threadgroup_rwsem); + cgroup_attach_unlock(); for_each_subsys(ss, ssid) if (ss->post_attach) ss->post_attach(); @@ -2818,7 +2858,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); - percpu_down_write(&cgroup_threadgroup_rwsem); + cgroup_attach_lock(); /* look up all csses currently attached to @cgrp's subtree */ spin_lock_irq(&css_set_lock); @@ -2848,7 +2888,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) ret = cgroup_migrate_execute(&mgctx); out_finish: cgroup_migrate_finish(&mgctx); - percpu_up_write(&cgroup_threadgroup_rwsem); + cgroup_attach_unlock(); return ret; } diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index def36c3fc52447d36982d72453de7be087245abd..1d13d64108a00589343073d9ed60451ecebbb2a1 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1612,11 +1612,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); - /* - * It should hold cpus lock because a cpu offline event can - * cause set_cpus_allowed_ptr() failed. - */ - get_online_cpus(); + lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ mutex_lock(&cpuset_mutex); /* prepare for attach */ @@ -1679,7 +1675,6 @@ static void cpuset_attach(struct cgroup_taskset *tset) wake_up(&cpuset_attach_wq); mutex_unlock(&cpuset_mutex); - put_online_cpus(); } /* The various types of files and directories in a cpuset file system */