提交 c4d6d8db 编写于 作者: D David Howells

CacheFiles: Fix the marking of cached pages

Under some circumstances CacheFiles defers the marking of pages with PG_fscache
so that it can take advantage of pagevecs to reduce the number of calls to
fscache_mark_pages_cached() and the netfs's hook to keep track of this.

There are, however, two problems with this:

 (1) It can lead to the PG_fscache mark being applied _after_ the page is set
     PG_uptodate and unlocked (by the call to fscache_end_io()).

 (2) CacheFiles's ref on the page is dropped immediately following
     fscache_end_io() - and so may not still be held when the mark is applied.
     This can lead to the page being passed back to the allocator before the
     mark is applied.

Fix this by, where appropriate, marking the page before calling
fscache_end_io() and releasing the page.  This means that we can't take
advantage of pagevecs and have to make a separate call for each page to the
marking routines.

The symptoms of this are Bad Page state errors cropping up under memory
pressure, for example:

BUG: Bad page state in process tar  pfn:002da
page:ffffea0000009fb0 count:0 mapcount:0 mapping:          (null) index:0x1447
page flags: 0x1000(private_2)
Pid: 4574, comm: tar Tainted: G        W   3.1.0-rc4-fsdevel+ #1064
Call Trace:
 [<ffffffff8109583c>] ? dump_page+0xb9/0xbe
 [<ffffffff81095916>] bad_page+0xd5/0xea
 [<ffffffff81095d82>] get_page_from_freelist+0x35b/0x46a
 [<ffffffff810961f3>] __alloc_pages_nodemask+0x362/0x662
 [<ffffffff810989da>] __do_page_cache_readahead+0x13a/0x267
 [<ffffffff81098942>] ? __do_page_cache_readahead+0xa2/0x267
 [<ffffffff81098d7b>] ra_submit+0x1c/0x20
 [<ffffffff8109900a>] ondemand_readahead+0x28b/0x29a
 [<ffffffff81098ee2>] ? ondemand_readahead+0x163/0x29a
 [<ffffffff810990ce>] page_cache_sync_readahead+0x38/0x3a
 [<ffffffff81091d8a>] generic_file_aio_read+0x2ab/0x67e
 [<ffffffffa008cfbe>] nfs_file_read+0xa4/0xc9 [nfs]
 [<ffffffff810c22c4>] do_sync_read+0xba/0xfa
 [<ffffffff81177a47>] ? security_file_permission+0x7b/0x84
 [<ffffffff810c25dd>] ? rw_verify_area+0xab/0xc8
 [<ffffffff810c29a4>] vfs_read+0xaa/0x13a
 [<ffffffff810c2a79>] sys_read+0x45/0x6c
 [<ffffffff813ac37b>] system_call_fastpath+0x16/0x1b

As can be seen, PG_private_2 (== PG_fscache) is set in the page flags.

Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was
set appropriately showed that sometimes it wasn't.  This led to the discovery
that sometimes the page has apparently been reclaimed by the time the marker
got to see it.
Reported-by: NM. Stevens <m@tippett.com>
Signed-off-by: NDavid Howells <dhowells@redhat.com>
Reviewed-by: NJeff Layton <jlayton@redhat.com>
上级 18000985
...@@ -176,9 +176,8 @@ static void cachefiles_read_copier(struct fscache_operation *_op) ...@@ -176,9 +176,8 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
recheck: recheck:
if (PageUptodate(monitor->back_page)) { if (PageUptodate(monitor->back_page)) {
copy_highpage(monitor->netfs_page, monitor->back_page); copy_highpage(monitor->netfs_page, monitor->back_page);
fscache_mark_page_cached(monitor->op,
pagevec_add(&pagevec, monitor->netfs_page); monitor->netfs_page);
fscache_mark_pages_cached(monitor->op, &pagevec);
error = 0; error = 0;
} else if (!PageError(monitor->back_page)) { } else if (!PageError(monitor->back_page)) {
/* the page has probably been truncated */ /* the page has probably been truncated */
...@@ -335,8 +334,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, ...@@ -335,8 +334,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
backing_page_already_uptodate: backing_page_already_uptodate:
_debug("- uptodate"); _debug("- uptodate");
pagevec_add(pagevec, netpage); fscache_mark_page_cached(op, netpage);
fscache_mark_pages_cached(op, pagevec);
copy_highpage(netpage, backpage); copy_highpage(netpage, backpage);
fscache_end_io(op, netpage, 0); fscache_end_io(op, netpage, 0);
...@@ -448,8 +446,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, ...@@ -448,8 +446,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
&pagevec); &pagevec);
} else if (cachefiles_has_space(cache, 0, 1) == 0) { } else if (cachefiles_has_space(cache, 0, 1) == 0) {
/* there's space in the cache we can use */ /* there's space in the cache we can use */
pagevec_add(&pagevec, page); fscache_mark_page_cached(op, page);
fscache_mark_pages_cached(op, &pagevec);
ret = -ENODATA; ret = -ENODATA;
} else { } else {
ret = -ENOBUFS; ret = -ENOBUFS;
...@@ -465,8 +462,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, ...@@ -465,8 +462,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
*/ */
static int cachefiles_read_backing_file(struct cachefiles_object *object, static int cachefiles_read_backing_file(struct cachefiles_object *object,
struct fscache_retrieval *op, struct fscache_retrieval *op,
struct list_head *list, struct list_head *list)
struct pagevec *mark_pvec)
{ {
struct cachefiles_one_read *monitor = NULL; struct cachefiles_one_read *monitor = NULL;
struct address_space *bmapping = object->backer->d_inode->i_mapping; struct address_space *bmapping = object->backer->d_inode->i_mapping;
...@@ -626,13 +622,13 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, ...@@ -626,13 +622,13 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
page_cache_release(backpage); page_cache_release(backpage);
backpage = NULL; backpage = NULL;
if (!pagevec_add(mark_pvec, netpage)) fscache_mark_page_cached(op, netpage);
fscache_mark_pages_cached(op, mark_pvec);
page_cache_get(netpage); page_cache_get(netpage);
if (!pagevec_add(&lru_pvec, netpage)) if (!pagevec_add(&lru_pvec, netpage))
__pagevec_lru_add_file(&lru_pvec); __pagevec_lru_add_file(&lru_pvec);
/* the netpage is unlocked and marked up to date here */
fscache_end_io(op, netpage, 0); fscache_end_io(op, netpage, 0);
page_cache_release(netpage); page_cache_release(netpage);
netpage = NULL; netpage = NULL;
...@@ -775,15 +771,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, ...@@ -775,15 +771,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
/* submit the apparently valid pages to the backing fs to be read from /* submit the apparently valid pages to the backing fs to be read from
* disk */ * disk */
if (nrbackpages > 0) { if (nrbackpages > 0) {
ret2 = cachefiles_read_backing_file(object, op, &backpages, ret2 = cachefiles_read_backing_file(object, op, &backpages);
&pagevec);
if (ret2 == -ENOMEM || ret2 == -EINTR) if (ret2 == -ENOMEM || ret2 == -EINTR)
ret = ret2; ret = ret2;
} }
if (pagevec_count(&pagevec) > 0)
fscache_mark_pages_cached(op, &pagevec);
_leave(" = %d [nr=%u%s]", _leave(" = %d [nr=%u%s]",
ret, *nr_pages, list_empty(pages) ? " empty" : ""); ret, *nr_pages, list_empty(pages) ? " empty" : "");
return ret; return ret;
...@@ -806,7 +798,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, ...@@ -806,7 +798,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
{ {
struct cachefiles_object *object; struct cachefiles_object *object;
struct cachefiles_cache *cache; struct cachefiles_cache *cache;
struct pagevec pagevec;
int ret; int ret;
object = container_of(op->op.object, object = container_of(op->op.object,
...@@ -817,13 +808,10 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, ...@@ -817,13 +808,10 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
_enter("%p,{%lx},", object, page->index); _enter("%p,{%lx},", object, page->index);
ret = cachefiles_has_space(cache, 0, 1); ret = cachefiles_has_space(cache, 0, 1);
if (ret == 0) { if (ret == 0)
pagevec_init(&pagevec, 0); fscache_mark_page_cached(op, page);
pagevec_add(&pagevec, page); else
fscache_mark_pages_cached(op, &pagevec);
} else {
ret = -ENOBUFS; ret = -ENOBUFS;
}
_leave(" = %d", ret); _leave(" = %d", ret);
return ret; return ret;
......
...@@ -914,6 +914,40 @@ void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) ...@@ -914,6 +914,40 @@ void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page)
} }
EXPORT_SYMBOL(__fscache_uncache_page); EXPORT_SYMBOL(__fscache_uncache_page);
/**
* fscache_mark_page_cached - Mark a page as being cached
* @op: The retrieval op pages are being marked for
* @page: The page to be marked
*
* Mark a netfs page as being cached. After this is called, the netfs
* must call fscache_uncache_page() to remove the mark.
*/
void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page)
{
struct fscache_cookie *cookie = op->op.object->cookie;
#ifdef CONFIG_FSCACHE_STATS
atomic_inc(&fscache_n_marks);
#endif
_debug("- mark %p{%lx}", page, page->index);
if (TestSetPageFsCache(page)) {
static bool once_only;
if (!once_only) {
once_only = true;
printk(KERN_WARNING "FS-Cache:"
" Cookie type %s marked page %lx"
" multiple times\n",
cookie->def->name, page->index);
}
}
if (cookie->def->mark_page_cached)
cookie->def->mark_page_cached(cookie->netfs_data,
op->mapping, page);
}
EXPORT_SYMBOL(fscache_mark_page_cached);
/** /**
* fscache_mark_pages_cached - Mark pages as being cached * fscache_mark_pages_cached - Mark pages as being cached
* @op: The retrieval op pages are being marked for * @op: The retrieval op pages are being marked for
...@@ -925,32 +959,11 @@ EXPORT_SYMBOL(__fscache_uncache_page); ...@@ -925,32 +959,11 @@ EXPORT_SYMBOL(__fscache_uncache_page);
void fscache_mark_pages_cached(struct fscache_retrieval *op, void fscache_mark_pages_cached(struct fscache_retrieval *op,
struct pagevec *pagevec) struct pagevec *pagevec)
{ {
struct fscache_cookie *cookie = op->op.object->cookie;
unsigned long loop; unsigned long loop;
#ifdef CONFIG_FSCACHE_STATS for (loop = 0; loop < pagevec->nr; loop++)
atomic_add(pagevec->nr, &fscache_n_marks); fscache_mark_page_cached(op, pagevec->pages[loop]);
#endif
for (loop = 0; loop < pagevec->nr; loop++) {
struct page *page = pagevec->pages[loop];
_debug("- mark %p{%lx}", page, page->index);
if (TestSetPageFsCache(page)) {
static bool once_only;
if (!once_only) {
once_only = true;
printk(KERN_WARNING "FS-Cache:"
" Cookie type %s marked page %lx"
" multiple times\n",
cookie->def->name, page->index);
}
}
}
if (cookie->def->mark_pages_cached)
cookie->def->mark_pages_cached(cookie->netfs_data,
op->mapping, pagevec);
pagevec_reinit(pagevec); pagevec_reinit(pagevec);
} }
EXPORT_SYMBOL(fscache_mark_pages_cached); EXPORT_SYMBOL(fscache_mark_pages_cached);
......
...@@ -504,6 +504,9 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache); ...@@ -504,6 +504,9 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache);
extern void fscache_io_error(struct fscache_cache *cache); extern void fscache_io_error(struct fscache_cache *cache);
extern void fscache_mark_page_cached(struct fscache_retrieval *op,
struct page *page);
extern void fscache_mark_pages_cached(struct fscache_retrieval *op, extern void fscache_mark_pages_cached(struct fscache_retrieval *op,
struct pagevec *pagevec); struct pagevec *pagevec);
......
...@@ -135,14 +135,14 @@ struct fscache_cookie_def { ...@@ -135,14 +135,14 @@ struct fscache_cookie_def {
*/ */
void (*put_context)(void *cookie_netfs_data, void *context); void (*put_context)(void *cookie_netfs_data, void *context);
/* indicate pages that now have cache metadata retained /* indicate page that now have cache metadata retained
* - this function should mark the specified pages as now being cached * - this function should mark the specified page as now being cached
* - the pages will have been marked with PG_fscache before this is * - the page will have been marked with PG_fscache before this is
* called, so this is optional * called, so this is optional
*/ */
void (*mark_pages_cached)(void *cookie_netfs_data, void (*mark_page_cached)(void *cookie_netfs_data,
struct address_space *mapping, struct address_space *mapping,
struct pagevec *cached_pvec); struct page *page);
/* indicate the cookie is no longer cached /* indicate the cookie is no longer cached
* - this function is called when the backing store currently caching * - this function is called when the backing store currently caching
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册