提交 a569469e 编写于 作者: J Jin Xu 提交者: Jaegeuk Kim

f2fs: fix a deadlock in fsync

This patch fixes a deadlock bug that occurs quite often when there are
concurrent write and fsync on a same file.

Following is the simplified call trace when tasks get hung.

fsync thread:
- f2fs_sync_file
 ...
 - f2fs_write_data_pages
 ...
  - update_extent_cache
  ...
   - update_inode
    - wait_on_page_writeback

bdi writeback thread
- __writeback_single_inode
 - f2fs_write_data_pages
  - mutex_lock(sbi->writepages)

The deadlock happens when the fsync thread waits on a inode page that has
been added to the f2fs' cached bio sbi->bio[NODE], and unfortunately,
no one else could be able to submit the cached bio to block layer for
writeback. This is because the fsync thread already hold a sbi->fs_lock and
the sbi->writepages lock, causing the bdi thread being blocked when attempt
to write data pages for the same inode. At the same time, f2fs_gc thread
does not notice the situation and could not help. Even the sync syscall
gets blocked.

To fix it, we could submit the cached bio first before waiting on a inode page
that is being written back.
Signed-off-by: NJin Xu <jinuxstyle@gmail.com>
[Jaegeuk Kim: add more cases to use f2fs_wait_on_page_writeback]
Signed-off-by: NJaegeuk Kim <jaegeuk.kim@samsung.com>
上级 df273efc
...@@ -37,7 +37,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) ...@@ -37,7 +37,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
struct page *node_page = dn->node_page; struct page *node_page = dn->node_page;
unsigned int ofs_in_node = dn->ofs_in_node; unsigned int ofs_in_node = dn->ofs_in_node;
wait_on_page_writeback(node_page); f2fs_wait_on_page_writeback(node_page, NODE, false);
rn = F2FS_NODE(node_page); rn = F2FS_NODE(node_page);
......
...@@ -1023,7 +1023,8 @@ int npages_for_summary_flush(struct f2fs_sb_info *); ...@@ -1023,7 +1023,8 @@ int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
struct bio *f2fs_bio_alloc(struct block_device *, int); struct bio *f2fs_bio_alloc(struct block_device *, int);
void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync); void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool);
void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
void write_meta_page(struct f2fs_sb_info *, struct page *); void write_meta_page(struct f2fs_sb_info *, struct page *);
void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
block_t, block_t *); block_t, block_t *);
......
...@@ -422,8 +422,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, ...@@ -422,8 +422,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
/* set page dirty and write it */ /* set page dirty and write it */
if (gc_type == FG_GC) { if (gc_type == FG_GC) {
f2fs_submit_bio(sbi, NODE, true); f2fs_wait_on_page_writeback(node_page, NODE, true);
wait_on_page_writeback(node_page);
set_page_dirty(node_page); set_page_dirty(node_page);
} else { } else {
if (!PageWriteback(node_page)) if (!PageWriteback(node_page))
...@@ -523,10 +522,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) ...@@ -523,10 +522,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
} else { } else {
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
if (PageWriteback(page)) { f2fs_wait_on_page_writeback(page, DATA, true);
f2fs_submit_bio(sbi, DATA, true);
wait_on_page_writeback(page);
}
if (clear_page_dirty_for_io(page) && if (clear_page_dirty_for_io(page) &&
S_ISDIR(inode->i_mode)) { S_ISDIR(inode->i_mode)) {
......
...@@ -151,7 +151,7 @@ void update_inode(struct inode *inode, struct page *node_page) ...@@ -151,7 +151,7 @@ void update_inode(struct inode *inode, struct page *node_page)
struct f2fs_node *rn; struct f2fs_node *rn;
struct f2fs_inode *ri; struct f2fs_inode *ri;
wait_on_page_writeback(node_page); f2fs_wait_on_page_writeback(node_page, NODE, false);
rn = F2FS_NODE(node_page); rn = F2FS_NODE(node_page);
ri = &(rn->i); ri = &(rn->i);
......
...@@ -705,6 +705,16 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, ...@@ -705,6 +705,16 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
trace_f2fs_submit_write_page(page, blk_addr, type); trace_f2fs_submit_write_page(page, blk_addr, type);
} }
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool sync)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
if (PageWriteback(page)) {
f2fs_submit_bio(sbi, type, sync);
wait_on_page_writeback(page);
}
}
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{ {
struct curseg_info *curseg = CURSEG_I(sbi, type); struct curseg_info *curseg = CURSEG_I(sbi, type);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册