提交 c2b42d3c 编写于 作者: T Tejun Heo 提交者: Linus Torvalds

memcg: convert mem_cgroup->under_oom from atomic_t to int

memcg->under_oom tracks whether the memcg is under OOM conditions and is
an atomic_t counter managed with mem_cgroup_[un]mark_under_oom().  While
atomic_t appears to be simple synchronization-wise, when used as a
synchronization construct like here, it's trickier and more error-prone
due to weak memory ordering rules, especially around atomic_read(), and
false sense of security.

For example, both non-trivial read sites of memcg->under_oom are a bit
problematic although not being actually broken.

* mem_cgroup_oom_register_event()

  It isn't explicit what guarantees the memory ordering between event
  addition and memcg->under_oom check.  This isn't broken only because
  memcg_oom_lock is used for both event list and memcg->oom_lock.

* memcg_oom_recover()

  The lockless test doesn't have any explanation why this would be
  safe.

mem_cgroup_[un]mark_under_oom() are very cold paths and there's no point
in avoiding locking memcg_oom_lock there.  This patch converts
memcg->under_oom from atomic_t to int, puts their modifications under
memcg_oom_lock and documents why the lockless test in
memcg_oom_recover() is safe.
Signed-off-by: NTejun Heo <tj@kernel.org>
Acked-by: NMichal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 f4b90b70
...@@ -285,8 +285,9 @@ struct mem_cgroup { ...@@ -285,8 +285,9 @@ struct mem_cgroup {
*/ */
bool use_hierarchy; bool use_hierarchy;
/* protected by memcg_oom_lock */
bool oom_lock; bool oom_lock;
atomic_t under_oom; int under_oom;
int swappiness; int swappiness;
/* OOM-Killer disable */ /* OOM-Killer disable */
...@@ -1809,8 +1810,10 @@ static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg) ...@@ -1809,8 +1810,10 @@ static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
{ {
struct mem_cgroup *iter; struct mem_cgroup *iter;
spin_lock(&memcg_oom_lock);
for_each_mem_cgroup_tree(iter, memcg) for_each_mem_cgroup_tree(iter, memcg)
atomic_inc(&iter->under_oom); iter->under_oom++;
spin_unlock(&memcg_oom_lock);
} }
static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg) static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
...@@ -1819,11 +1822,13 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg) ...@@ -1819,11 +1822,13 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
/* /*
* When a new child is created while the hierarchy is under oom, * When a new child is created while the hierarchy is under oom,
* mem_cgroup_oom_lock() may not be called. We have to use * mem_cgroup_oom_lock() may not be called. Watch for underflow.
* atomic_add_unless() here.
*/ */
spin_lock(&memcg_oom_lock);
for_each_mem_cgroup_tree(iter, memcg) for_each_mem_cgroup_tree(iter, memcg)
atomic_add_unless(&iter->under_oom, -1, 0); if (iter->under_oom > 0)
iter->under_oom--;
spin_unlock(&memcg_oom_lock);
} }
static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
...@@ -1851,7 +1856,15 @@ static int memcg_oom_wake_function(wait_queue_t *wait, ...@@ -1851,7 +1856,15 @@ static int memcg_oom_wake_function(wait_queue_t *wait,
static void memcg_oom_recover(struct mem_cgroup *memcg) static void memcg_oom_recover(struct mem_cgroup *memcg)
{ {
if (memcg && atomic_read(&memcg->under_oom)) /*
* For the following lockless ->under_oom test, the only required
* guarantee is that it must see the state asserted by an OOM when
* this function is called as a result of userland actions
* triggered by the notification of the OOM. This is trivially
* achieved by invoking mem_cgroup_mark_under_oom() before
* triggering notification.
*/
if (memcg && memcg->under_oom)
__wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg); __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
} }
...@@ -3860,7 +3873,7 @@ static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, ...@@ -3860,7 +3873,7 @@ static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
list_add(&event->list, &memcg->oom_notify); list_add(&event->list, &memcg->oom_notify);
/* already in OOM ? */ /* already in OOM ? */
if (atomic_read(&memcg->under_oom)) if (memcg->under_oom)
eventfd_signal(eventfd, 1); eventfd_signal(eventfd, 1);
spin_unlock(&memcg_oom_lock); spin_unlock(&memcg_oom_lock);
...@@ -3889,7 +3902,7 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) ...@@ -3889,7 +3902,7 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf)); struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf));
seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable); seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
seq_printf(sf, "under_oom %d\n", (bool)atomic_read(&memcg->under_oom)); seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
return 0; return 0;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册