提交 54595fe2 编写于 作者: K KAMEZAWA Hiroyuki 提交者: Linus Torvalds

memcg: use css_tryget in memcg

From:KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

css_tryget() newly is added and we can know css is alive or not and get
refcnt of css in very safe way.  ("alive" here means "rmdir/destroy" is
not called.)

This patch replaces css_get() to css_tryget(), where I cannot explain
why css_get() is safe. And removes memcg->obsolete flag.
Reviewed-by: NDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: NKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 a7ba0eef
...@@ -162,7 +162,6 @@ struct mem_cgroup { ...@@ -162,7 +162,6 @@ struct mem_cgroup {
*/ */
bool use_hierarchy; bool use_hierarchy;
unsigned long last_oom_jiffies; unsigned long last_oom_jiffies;
int obsolete;
atomic_t refcnt; atomic_t refcnt;
unsigned int swappiness; unsigned int swappiness;
...@@ -283,6 +282,31 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) ...@@ -283,6 +282,31 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
struct mem_cgroup, css); struct mem_cgroup, css);
} }
static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
{
struct mem_cgroup *mem = NULL;
/*
* Because we have no locks, mm->owner's may be being moved to other
* cgroup. We use css_tryget() here even if this looks
* pessimistic (rather than adding locks here).
*/
rcu_read_lock();
do {
mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
if (unlikely(!mem))
break;
} while (!css_tryget(&mem->css));
rcu_read_unlock();
return mem;
}
static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
{
if (!mem)
return true;
return css_is_removed(&mem->css);
}
/* /*
* Following LRU functions are allowed to be used without PCG_LOCK. * Following LRU functions are allowed to be used without PCG_LOCK.
* Operations are called by routine of global LRU independently from memcg. * Operations are called by routine of global LRU independently from memcg.
...@@ -622,8 +646,9 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem) ...@@ -622,8 +646,9 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
{ {
struct cgroup *cgroup; struct cgroup *cgroup;
struct mem_cgroup *ret; struct mem_cgroup *ret;
bool obsolete = (root_mem->last_scanned_child && bool obsolete;
root_mem->last_scanned_child->obsolete);
obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
/* /*
* Scan all children under the mem_cgroup mem * Scan all children under the mem_cgroup mem
...@@ -636,7 +661,7 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem) ...@@ -636,7 +661,7 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
if (!root_mem->last_scanned_child || obsolete) { if (!root_mem->last_scanned_child || obsolete) {
if (obsolete) if (obsolete && root_mem->last_scanned_child)
mem_cgroup_put(root_mem->last_scanned_child); mem_cgroup_put(root_mem->last_scanned_child);
cgroup = list_first_entry(&root_mem->css.cgroup->children, cgroup = list_first_entry(&root_mem->css.cgroup->children,
...@@ -711,7 +736,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, ...@@ -711,7 +736,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
next_mem = mem_cgroup_get_first_node(root_mem); next_mem = mem_cgroup_get_first_node(root_mem);
while (next_mem != root_mem) { while (next_mem != root_mem) {
if (next_mem->obsolete) { if (mem_cgroup_is_obsolete(next_mem)) {
mem_cgroup_put(next_mem); mem_cgroup_put(next_mem);
cgroup_lock(); cgroup_lock();
next_mem = mem_cgroup_get_first_node(root_mem); next_mem = mem_cgroup_get_first_node(root_mem);
...@@ -769,23 +794,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, ...@@ -769,23 +794,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
* thread group leader migrates. It's possible that mm is not * thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage). * set, if so charge the init_mm (happens for pagecache usage).
*/ */
if (likely(!*memcg)) { mem = *memcg;
rcu_read_lock(); if (likely(!mem)) {
mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); mem = try_get_mem_cgroup_from_mm(mm);
if (unlikely(!mem)) {
rcu_read_unlock();
return 0;
}
/*
* For every charge from the cgroup, increment reference count
*/
css_get(&mem->css);
*memcg = mem; *memcg = mem;
rcu_read_unlock();
} else { } else {
mem = *memcg;
css_get(&mem->css); css_get(&mem->css);
} }
if (unlikely(!mem))
return 0;
VM_BUG_ON(mem_cgroup_is_obsolete(mem));
while (1) { while (1) {
int ret; int ret;
...@@ -1072,12 +1091,19 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, ...@@ -1072,12 +1091,19 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
} }
/*
* While swap-in, try_charge -> commit or cancel, the page is locked.
* And when try_charge() successfully returns, one refcnt to memcg without
* struct page_cgroup is aquired. This refcnt will be cumsumed by
* "commit()" or removed by "cancel()"
*/
int mem_cgroup_try_charge_swapin(struct mm_struct *mm, int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
struct page *page, struct page *page,
gfp_t mask, struct mem_cgroup **ptr) gfp_t mask, struct mem_cgroup **ptr)
{ {
struct mem_cgroup *mem; struct mem_cgroup *mem;
swp_entry_t ent; swp_entry_t ent;
int ret;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
return 0; return 0;
...@@ -1096,10 +1122,15 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, ...@@ -1096,10 +1122,15 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
ent.val = page_private(page); ent.val = page_private(page);
mem = lookup_swap_cgroup(ent); mem = lookup_swap_cgroup(ent);
if (!mem || mem->obsolete) if (!mem)
goto charge_cur_mm;
if (!css_tryget(&mem->css))
goto charge_cur_mm; goto charge_cur_mm;
*ptr = mem; *ptr = mem;
return __mem_cgroup_try_charge(NULL, mask, ptr, true); ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
/* drop extra refcnt from tryget */
css_put(&mem->css);
return ret;
charge_cur_mm: charge_cur_mm:
if (unlikely(!mm)) if (unlikely(!mm))
mm = &init_mm; mm = &init_mm;
...@@ -1130,13 +1161,18 @@ int mem_cgroup_cache_charge_swapin(struct page *page, ...@@ -1130,13 +1161,18 @@ int mem_cgroup_cache_charge_swapin(struct page *page,
ent.val = page_private(page); ent.val = page_private(page);
if (do_swap_account) { if (do_swap_account) {
mem = lookup_swap_cgroup(ent); mem = lookup_swap_cgroup(ent);
if (mem && mem->obsolete) if (mem) {
mem = NULL; if (css_tryget(&mem->css))
if (mem) mm = NULL; /* charge to recorded */
mm = NULL; else
mem = NULL; /* charge to current */
}
} }
ret = mem_cgroup_charge_common(page, mm, mask, ret = mem_cgroup_charge_common(page, mm, mask,
MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
/* drop extra refcnt from tryget */
if (mem)
css_put(&mem->css);
if (!ret && do_swap_account) { if (!ret && do_swap_account) {
/* avoid double counting */ /* avoid double counting */
...@@ -1178,7 +1214,6 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) ...@@ -1178,7 +1214,6 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
memcg = swap_cgroup_record(ent, NULL); memcg = swap_cgroup_record(ent, NULL);
if (memcg) { if (memcg) {
/* If memcg is obsolete, memcg can be != ptr */
res_counter_uncharge(&memcg->memsw, PAGE_SIZE); res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg); mem_cgroup_put(memcg);
} }
...@@ -1421,14 +1456,9 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) ...@@ -1421,14 +1456,9 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
if (!mm) if (!mm)
return 0; return 0;
rcu_read_lock(); mem = try_get_mem_cgroup_from_mm(mm);
mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (unlikely(!mem))
if (unlikely(!mem)) {
rcu_read_unlock();
return 0; return 0;
}
css_get(&mem->css);
rcu_read_unlock();
do { do {
progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true); progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true);
...@@ -2086,9 +2116,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) ...@@ -2086,9 +2116,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
* the number of reference from swap_cgroup and free mem_cgroup when * the number of reference from swap_cgroup and free mem_cgroup when
* it goes down to 0. * it goes down to 0.
* *
* When mem_cgroup is destroyed, mem->obsolete will be set to 0 and
* entry which points to this memcg will be ignore at swapin.
*
* Removal of cgroup itself succeeds regardless of refs from swap. * Removal of cgroup itself succeeds regardless of refs from swap.
*/ */
...@@ -2174,7 +2201,6 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, ...@@ -2174,7 +2201,6 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
struct cgroup *cont) struct cgroup *cont)
{ {
struct mem_cgroup *mem = mem_cgroup_from_cont(cont); struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
mem->obsolete = 1;
mem_cgroup_force_empty(mem, false); mem_cgroup_force_empty(mem, false);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册