提交 146aa1bd 编写于 作者: L Lai Jiangshan 提交者: Linus Torvalds

cgroups: fix probable race with put_css_set[_taskexit] and find_css_set

put_css_set_taskexit may be called when find_css_set is called on other
cpu.  And the race will occur:

put_css_set_taskexit side                    find_css_set side

                                        |
atomic_dec_and_test(&kref->refcount)    |
    /* kref->refcount = 0 */            |
....................................................................
                                        |  read_lock(&css_set_lock)
                                        |  find_existing_css_set
                                        |  get_css_set
                                        |  read_unlock(&css_set_lock);
....................................................................
__release_css_set                       |
....................................................................
                                        | /* use a released css_set */
                                        |

[put_css_set is the same. But in the current code, all put_css_set are
put into cgroup mutex critical region as the same as find_css_set.]

[akpm@linux-foundation.org: repair comments]
[menage@google.com: eliminate race in css_set refcounting]
Signed-off-by: NLai Jiangshan <laijs@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: NPaul Menage <menage@google.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 248736c2
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
*/ */
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/kref.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/nodemask.h> #include <linux/nodemask.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
...@@ -149,7 +148,7 @@ struct cgroup { ...@@ -149,7 +148,7 @@ struct cgroup {
struct css_set { struct css_set {
/* Reference count */ /* Reference count */
struct kref ref; atomic_t refcount;
/* /*
* List running through all cgroup groups in the same hash * List running through all cgroup groups in the same hash
......
...@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg) ...@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
struct cg_cgroup_link *link; struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link; struct cg_cgroup_link *saved_link;
write_lock(&css_set_lock);
hlist_del(&cg->hlist); hlist_del(&cg->hlist);
css_set_count--; css_set_count--;
...@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg) ...@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
list_del(&link->cgrp_link_list); list_del(&link->cgrp_link_list);
kfree(link); kfree(link);
} }
write_unlock(&css_set_lock);
} }
static void __release_css_set(struct kref *k, int taskexit) static void __put_css_set(struct css_set *cg, int taskexit)
{ {
int i; int i;
struct css_set *cg = container_of(k, struct css_set, ref); /*
* Ensure that the refcount doesn't hit zero while any readers
* can see it. Similar to atomic_dec_and_lock(), but for an
* rwlock
*/
if (atomic_add_unless(&cg->refcount, -1, 1))
return;
write_lock(&css_set_lock);
if (!atomic_dec_and_test(&cg->refcount)) {
write_unlock(&css_set_lock);
return;
}
unlink_css_set(cg); unlink_css_set(cg);
write_unlock(&css_set_lock);
rcu_read_lock(); rcu_read_lock();
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
...@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit) ...@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
kfree(cg); kfree(cg);
} }
static void release_css_set(struct kref *k)
{
__release_css_set(k, 0);
}
static void release_css_set_taskexit(struct kref *k)
{
__release_css_set(k, 1);
}
/* /*
* refcounted get/put for css_set objects * refcounted get/put for css_set objects
*/ */
static inline void get_css_set(struct css_set *cg) static inline void get_css_set(struct css_set *cg)
{ {
kref_get(&cg->ref); atomic_inc(&cg->refcount);
} }
static inline void put_css_set(struct css_set *cg) static inline void put_css_set(struct css_set *cg)
{ {
kref_put(&cg->ref, release_css_set); __put_css_set(cg, 0);
} }
static inline void put_css_set_taskexit(struct css_set *cg) static inline void put_css_set_taskexit(struct css_set *cg)
{ {
kref_put(&cg->ref, release_css_set_taskexit); __put_css_set(cg, 1);
} }
/* /*
...@@ -427,7 +425,7 @@ static struct css_set *find_css_set( ...@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
return NULL; return NULL;
} }
kref_init(&res->ref); atomic_set(&res->refcount, 1);
INIT_LIST_HEAD(&res->cg_links); INIT_LIST_HEAD(&res->cg_links);
INIT_LIST_HEAD(&res->tasks); INIT_LIST_HEAD(&res->tasks);
INIT_HLIST_NODE(&res->hlist); INIT_HLIST_NODE(&res->hlist);
...@@ -1728,7 +1726,7 @@ int cgroup_task_count(const struct cgroup *cgrp) ...@@ -1728,7 +1726,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
read_lock(&css_set_lock); read_lock(&css_set_lock);
list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
count += atomic_read(&link->cg->ref.refcount); count += atomic_read(&link->cg->refcount);
} }
read_unlock(&css_set_lock); read_unlock(&css_set_lock);
return count; return count;
...@@ -2495,8 +2493,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) ...@@ -2495,8 +2493,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
int __init cgroup_init_early(void) int __init cgroup_init_early(void)
{ {
int i; int i;
kref_init(&init_css_set.ref); atomic_set(&init_css_set.refcount, 1);
kref_get(&init_css_set.ref);
INIT_LIST_HEAD(&init_css_set.cg_links); INIT_LIST_HEAD(&init_css_set.cg_links);
INIT_LIST_HEAD(&init_css_set.tasks); INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist); INIT_HLIST_NODE(&init_css_set.hlist);
......
...@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont, ...@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
u64 count; u64 count;
rcu_read_lock(); rcu_read_lock();
count = atomic_read(&current->cgroups->ref.refcount); count = atomic_read(&current->cgroups->refcount);
rcu_read_unlock(); rcu_read_unlock();
return count; return count;
} }
...@@ -90,7 +90,7 @@ static struct cftype files[] = { ...@@ -90,7 +90,7 @@ static struct cftype files[] = {
{ {
.name = "releasable", .name = "releasable",
.read_u64 = releasable_read, .read_u64 = releasable_read,
} },
}; };
static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册