提交 600a45e1 编写于 作者: M Miao Xie 提交者: David Sterba

Btrfs: fix deadlock on page lock when doing auto-defragment

When I ran xfstests circularly on a auto-defragment btrfs, the deadlock
happened.

Steps to reproduce:
[tty0]
 # export MOUNT_OPTIONS="-o autodefrag"
 # export TEST_DEV=<partition1>
 # export TEST_DIR=<mountpoint1>
 # export SCRATCH_DEV=<partition2>
 # export SCRATCH_MNT=<mountpoint2>
 # while [ 1 ]
 > do
 > ./check 091 127 263
 > sleep 1
 > done
[tty1]
 # while [ 1 ]
 > do
 > echo 3 > /proc/sys/vm/drop_caches
 > done

Several hours later, the test processes will hang on, and the deadlock will
happen on page lock.

The reason is that:
  Auto defrag task		Flush thread			Test task
				btrfs_writepages()
				  add ordered extent
				  (including page 1, 2)
				  set page 1 writeback
				  set page 2 writeback
				endio_fn()
				  end page 2 writeback
								release page 2
lock page 1
alloc and lock page 2
page 2 is not uptodate
  btrfs_readpage()
    start ordered extent()
    btrfs_writepages()
      try  to lock page 1

so deadlock happens.

Fix this bug by unlocking the page which is in writeback, and re-locking it
after the writeback end.
Signed-off-by: NMiao Xie <miax@cn.fujitsu.com>
上级 013bd4c3
...@@ -862,6 +862,7 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -862,6 +862,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
int i_done; int i_done;
struct btrfs_ordered_extent *ordered; struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
struct extent_io_tree *tree;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
if (isize == 0) if (isize == 0)
...@@ -872,18 +873,34 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -872,18 +873,34 @@ static int cluster_pages_for_defrag(struct inode *inode,
num_pages << PAGE_CACHE_SHIFT); num_pages << PAGE_CACHE_SHIFT);
if (ret) if (ret)
return ret; return ret;
again:
ret = 0;
i_done = 0; i_done = 0;
tree = &BTRFS_I(inode)->io_tree;
/* step one, lock all the pages */ /* step one, lock all the pages */
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
struct page *page; struct page *page;
again:
page = find_or_create_page(inode->i_mapping, page = find_or_create_page(inode->i_mapping,
start_index + i, mask); start_index + i, mask);
if (!page) if (!page)
break; break;
page_start = page_offset(page);
page_end = page_start + PAGE_CACHE_SIZE - 1;
while (1) {
lock_extent(tree, page_start, page_end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(inode,
page_start);
unlock_extent(tree, page_start, page_end, GFP_NOFS);
if (!ordered)
break;
unlock_page(page);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
lock_page(page);
}
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
btrfs_readpage(NULL, page); btrfs_readpage(NULL, page);
lock_page(page); lock_page(page);
...@@ -894,15 +911,22 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -894,15 +911,22 @@ static int cluster_pages_for_defrag(struct inode *inode,
break; break;
} }
} }
isize = i_size_read(inode); isize = i_size_read(inode);
file_end = (isize - 1) >> PAGE_CACHE_SHIFT; file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
if (!isize || page->index > file_end || if (!isize || page->index > file_end) {
page->mapping != inode->i_mapping) {
/* whoops, we blew past eof, skip this page */ /* whoops, we blew past eof, skip this page */
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
break; break;
} }
if (page->mapping != inode->i_mapping) {
unlock_page(page);
page_cache_release(page);
goto again;
}
pages[i] = page; pages[i] = page;
i_done++; i_done++;
} }
...@@ -925,25 +949,6 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -925,25 +949,6 @@ static int cluster_pages_for_defrag(struct inode *inode,
lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_extent_bits(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1, 0, &cached_state, page_start, page_end - 1, 0, &cached_state,
GFP_NOFS); GFP_NOFS);
ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
if (ordered &&
ordered->file_offset + ordered->len > page_start &&
ordered->file_offset < page_end) {
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1,
&cached_state, GFP_NOFS);
for (i = 0; i < i_done; i++) {
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
btrfs_wait_ordered_range(inode, page_start,
page_end - page_start);
goto again;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册