提交 acf92b48 编写于 作者: D Dave Chinner 提交者: Al Viro

vmscan: shrinker->nr updates race and go wrong

shrink_slab() allows shrinkers to be called in parallel so the
struct shrinker can be updated concurrently. It does not provide any
exclusio for such updates, so we can get the shrinker->nr value
increasing or decreasing incorrectly.

As a result, when a shrinker repeatedly returns a value of -1 (e.g.
a VFS shrinker called w/ GFP_NOFS), the shrinker->nr goes haywire,
sometimes updating with the scan count that wasn't used, sometimes
losing it altogether. Worse is when a shrinker does work and that
update is lost due to racy updates, which means the shrinker will do
the work again!

Fix this by making the total_scan calculations independent of
shrinker->nr, and making the shrinker->nr updates atomic w.r.t. to
other updates via cmpxchg loops.
Signed-off-by: NDave Chinner <dchinner@redhat.com>
Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
上级 09576073
...@@ -251,17 +251,29 @@ unsigned long shrink_slab(struct shrink_control *shrink, ...@@ -251,17 +251,29 @@ unsigned long shrink_slab(struct shrink_control *shrink,
unsigned long total_scan; unsigned long total_scan;
unsigned long max_pass; unsigned long max_pass;
int shrink_ret = 0; int shrink_ret = 0;
long nr;
long new_nr;
/*
* copy the current shrinker scan count into a local variable
* and zero it so that other concurrent shrinker invocations
* don't also do this scanning work.
*/
do {
nr = shrinker->nr;
} while (cmpxchg(&shrinker->nr, nr, 0) != nr);
total_scan = nr;
max_pass = do_shrinker_shrink(shrinker, shrink, 0); max_pass = do_shrinker_shrink(shrinker, shrink, 0);
delta = (4 * nr_pages_scanned) / shrinker->seeks; delta = (4 * nr_pages_scanned) / shrinker->seeks;
delta *= max_pass; delta *= max_pass;
do_div(delta, lru_pages + 1); do_div(delta, lru_pages + 1);
shrinker->nr += delta; total_scan += delta;
if (shrinker->nr < 0) { if (total_scan < 0) {
printk(KERN_ERR "shrink_slab: %pF negative objects to " printk(KERN_ERR "shrink_slab: %pF negative objects to "
"delete nr=%ld\n", "delete nr=%ld\n",
shrinker->shrink, shrinker->nr); shrinker->shrink, total_scan);
shrinker->nr = max_pass; total_scan = max_pass;
} }
/* /*
...@@ -269,13 +281,10 @@ unsigned long shrink_slab(struct shrink_control *shrink, ...@@ -269,13 +281,10 @@ unsigned long shrink_slab(struct shrink_control *shrink,
* never try to free more than twice the estimate number of * never try to free more than twice the estimate number of
* freeable entries. * freeable entries.
*/ */
if (shrinker->nr > max_pass * 2) if (total_scan > max_pass * 2)
shrinker->nr = max_pass * 2; total_scan = max_pass * 2;
total_scan = shrinker->nr;
shrinker->nr = 0;
trace_mm_shrink_slab_start(shrinker, shrink, total_scan, trace_mm_shrink_slab_start(shrinker, shrink, nr,
nr_pages_scanned, lru_pages, nr_pages_scanned, lru_pages,
max_pass, delta, total_scan); max_pass, delta, total_scan);
...@@ -296,9 +305,19 @@ unsigned long shrink_slab(struct shrink_control *shrink, ...@@ -296,9 +305,19 @@ unsigned long shrink_slab(struct shrink_control *shrink,
cond_resched(); cond_resched();
} }
shrinker->nr += total_scan; /*
trace_mm_shrink_slab_end(shrinker, shrink_ret, total_scan, * move the unused scan count back into the shrinker in a
shrinker->nr); * manner that handles concurrent updates. If we exhausted the
* scan, there is no need to do an update.
*/
do {
nr = shrinker->nr;
new_nr = total_scan + nr;
if (total_scan <= 0)
break;
} while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
} }
up_read(&shrinker_rwsem); up_read(&shrinker_rwsem);
out: out:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册