提交 bb21c7ce 编写于 作者: K KOSAKI Motohiro 提交者: Linus Torvalds

vmscan: fix do_try_to_free_pages() return value when priority==0 reclaim failure

Greg Thelen reported recent Johannes's stack diet patch makes kernel hang.
 His test is following.

  mount -t cgroup none /cgroups -o memory
  mkdir /cgroups/cg1
  echo $$ > /cgroups/cg1/tasks
  dd bs=1024 count=1024 if=/dev/null of=/data/foo
  echo $$ > /cgroups/tasks
  echo 1 > /cgroups/cg1/memory.force_empty

Actually, This OOM hard to try logic have been corrupted since following
two years old patch.

	commit a41f24ea
	Author: Nishanth Aravamudan <nacc@us.ibm.com>
	Date:   Tue Apr 29 00:58:25 2008 -0700

	    page allocator: smarter retry of costly-order allocations

Original intention was "return success if the system have shrinkable zones
though priority==0 reclaim was failure".  But the above patch changed to
"return nr_reclaimed if .....".  Oh, That forgot nr_reclaimed may be 0 if
priority==0 reclaim failure.

And Johannes's patch 0aeb2339 ("vmscan: remove all_unreclaimable scan
control") made it more corrupt.  Originally, priority==0 reclaim failure
on memcg return 0, but this patch changed to return 1.  It totally
confused memcg.

This patch fixes it completely.
Reported-by: NGreg Thelen <gthelen@google.com>
Signed-off-by: NKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: NJohannes Weiner <hannes@cmpxchg.org>
Acked-by: NKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: NGreg Thelen <gthelen@google.com>
Acked-by: NBalbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 9e506f7a
...@@ -1724,13 +1724,13 @@ static void shrink_zone(int priority, struct zone *zone, ...@@ -1724,13 +1724,13 @@ static void shrink_zone(int priority, struct zone *zone,
* If a zone is deemed to be full of pinned pages then just give it a light * If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it. * scan then give up on it.
*/ */
static int shrink_zones(int priority, struct zonelist *zonelist, static bool shrink_zones(int priority, struct zonelist *zonelist,
struct scan_control *sc) struct scan_control *sc)
{ {
enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
struct zoneref *z; struct zoneref *z;
struct zone *zone; struct zone *zone;
int progress = 0; bool all_unreclaimable = true;
for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
sc->nodemask) { sc->nodemask) {
...@@ -1757,9 +1757,9 @@ static int shrink_zones(int priority, struct zonelist *zonelist, ...@@ -1757,9 +1757,9 @@ static int shrink_zones(int priority, struct zonelist *zonelist,
} }
shrink_zone(priority, zone, sc); shrink_zone(priority, zone, sc);
progress = 1; all_unreclaimable = false;
} }
return progress; return all_unreclaimable;
} }
/* /*
...@@ -1782,7 +1782,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1782,7 +1782,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct scan_control *sc) struct scan_control *sc)
{ {
int priority; int priority;
unsigned long ret = 0; bool all_unreclaimable;
unsigned long total_scanned = 0; unsigned long total_scanned = 0;
struct reclaim_state *reclaim_state = current->reclaim_state; struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long lru_pages = 0; unsigned long lru_pages = 0;
...@@ -1813,7 +1813,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1813,7 +1813,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
sc->nr_scanned = 0; sc->nr_scanned = 0;
if (!priority) if (!priority)
disable_swap_token(); disable_swap_token();
ret = shrink_zones(priority, zonelist, sc); all_unreclaimable = shrink_zones(priority, zonelist, sc);
/* /*
* Don't shrink slabs when reclaiming memory from * Don't shrink slabs when reclaiming memory from
* over limit cgroups * over limit cgroups
...@@ -1826,10 +1826,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1826,10 +1826,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
} }
} }
total_scanned += sc->nr_scanned; total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim) { if (sc->nr_reclaimed >= sc->nr_to_reclaim)
ret = sc->nr_reclaimed;
goto out; goto out;
}
/* /*
* Try to write back as many pages as we just scanned. This * Try to write back as many pages as we just scanned. This
...@@ -1849,9 +1847,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1849,9 +1847,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
priority < DEF_PRIORITY - 2) priority < DEF_PRIORITY - 2)
congestion_wait(BLK_RW_ASYNC, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
} }
/* top priority shrink_zones still had more to do? don't OOM, then */
if (ret && scanning_global_lru(sc))
ret = sc->nr_reclaimed;
out: out:
/* /*
* Now that we've scanned all the zones at this priority level, note * Now that we've scanned all the zones at this priority level, note
...@@ -1877,7 +1873,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1877,7 +1873,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
delayacct_freepages_end(); delayacct_freepages_end();
put_mems_allowed(); put_mems_allowed();
return ret; if (sc->nr_reclaimed)
return sc->nr_reclaimed;
/* top priority shrink_zones still had more to do? don't OOM, then */
if (scanning_global_lru(sc) && !all_unreclaimable)
return 1;
return 0;
} }
unsigned long try_to_free_pages(struct zonelist *zonelist, int order, unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册