提交 7d6e1f54 编写于 作者: S Sha Zhengju 提交者: Sage Weil

ceph: use vfs __set_page_dirty_nobuffers interface instead of doing it inside filesystem

Following we will begin to add memcg dirty page accounting around
__set_page_dirty_{buffers,nobuffers} in vfs layer, so we'd better use vfs interface to
avoid exporting those details to filesystems.

Since vfs set_page_dirty() should be called under page lock, here we don't need elaborate
codes to handle racy anymore, and two WARN_ON() are added to detect such exceptions.
Thanks very much for Sage and Yan Zheng's coaching!

I tested it in a two server's ceph environment that one is client and the other is
mds/osd/mon, and run the following fsx test from xfstests:

  ./fsx   1MB -N 50000 -p 10000 -l 1048576
  ./fsx  10MB -N 50000 -p 10000 -l 10485760
  ./fsx 100MB -N 50000 -p 10000 -l 104857600

The fsx does lots of mmap-read/mmap-write/truncate operations and the tests completed
successfully without triggering any of WARN_ON.
Signed-off-by: NSha Zhengju <handai.szj@taobao.com>
Reviewed-by: NSage Weil <sage@inktank.com>
上级 ee7289bf
...@@ -70,15 +70,16 @@ static int ceph_set_page_dirty(struct page *page) ...@@ -70,15 +70,16 @@ static int ceph_set_page_dirty(struct page *page)
struct address_space *mapping = page->mapping; struct address_space *mapping = page->mapping;
struct inode *inode; struct inode *inode;
struct ceph_inode_info *ci; struct ceph_inode_info *ci;
int undo = 0;
struct ceph_snap_context *snapc; struct ceph_snap_context *snapc;
int ret;
if (unlikely(!mapping)) if (unlikely(!mapping))
return !TestSetPageDirty(page); return !TestSetPageDirty(page);
if (TestSetPageDirty(page)) { if (PageDirty(page)) {
dout("%p set_page_dirty %p idx %lu -- already dirty\n", dout("%p set_page_dirty %p idx %lu -- already dirty\n",
mapping->host, page, page->index); mapping->host, page, page->index);
BUG_ON(!PagePrivate(page));
return 0; return 0;
} }
...@@ -107,35 +108,19 @@ static int ceph_set_page_dirty(struct page *page) ...@@ -107,35 +108,19 @@ static int ceph_set_page_dirty(struct page *page)
snapc, snapc->seq, snapc->num_snaps); snapc, snapc->seq, snapc->num_snaps);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
/* now adjust page */ /*
spin_lock_irq(&mapping->tree_lock); * Reference snap context in page->private. Also set
if (page->mapping) { /* Race with truncate? */ * PagePrivate so that we get invalidatepage callback.
WARN_ON_ONCE(!PageUptodate(page)); */
account_page_dirtied(page, page->mapping); BUG_ON(PagePrivate(page));
radix_tree_tag_set(&mapping->page_tree, page->private = (unsigned long)snapc;
page_index(page), PAGECACHE_TAG_DIRTY); SetPagePrivate(page);
/*
* Reference snap context in page->private. Also set
* PagePrivate so that we get invalidatepage callback.
*/
page->private = (unsigned long)snapc;
SetPagePrivate(page);
} else {
dout("ANON set_page_dirty %p (raced truncate?)\n", page);
undo = 1;
}
spin_unlock_irq(&mapping->tree_lock);
if (undo)
/* whoops, we failed to dirty the page */
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); ret = __set_page_dirty_nobuffers(page);
WARN_ON(!PageLocked(page));
WARN_ON(!page->mapping);
BUG_ON(!PageDirty(page)); return ret;
return 1;
} }
/* /*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册