提交 f9d6d059 编写于 作者: C Chao Yu 提交者: Jaegeuk Kim

f2fs: fix out-place-update DIO write

In get_more_blocks(), we may override @create as below code:

	create = dio->op == REQ_OP_WRITE;
	if (dio->flags & DIO_SKIP_HOLES) {
		if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
						i_blkbits))
			create = 0;
	}

But in f2fs_map_blocks(), we only trigger f2fs_balance_fs() if @create
is 1, so in LFS mode, dio overwrite under LFS mode can easily run out
of free segments, result in below panic.

 Call Trace:
  allocate_segment_by_default+0xa8/0x270 [f2fs]
  f2fs_allocate_data_block+0x1ea/0x5c0 [f2fs]
  __allocate_data_block+0x306/0x480 [f2fs]
  f2fs_map_blocks+0x6f6/0x920 [f2fs]
  __get_data_block+0x4f/0xb0 [f2fs]
  get_data_block_dio_write+0x50/0x60 [f2fs]
  do_blockdev_direct_IO+0xcd5/0x21e0
  __blockdev_direct_IO+0x3a/0x3c
  f2fs_direct_IO+0x1ff/0x4a0 [f2fs]
  generic_file_direct_write+0xd9/0x160
  __generic_file_write_iter+0xbb/0x1e0
  f2fs_file_write_iter+0xaf/0x220 [f2fs]
  __vfs_write+0xd0/0x130
  vfs_write+0xb2/0x1b0
  SyS_pwrite64+0x69/0xa0
  ? vtime_user_exit+0x29/0x70
  do_syscall_64+0x6e/0x160
  entry_SYSCALL64_slow_path+0x25/0x25
 RIP: new_curseg+0x36f/0x380 [f2fs] RSP: ffffac570393f7a8

So this patch introduces a parameter map.m_may_create to indicate that
f2fs_map_blocks() is called from write or read path, which can give the
right hint to let f2fs_map_blocks() trigger OPU allocation and call
f2fs_balanc_fs() correctly.

BTW, it disables physical address preallocation for direct IO in
f2fs_preallocate_blocks, which is redundant to OPU allocation of
f2fs_map_blocks.
Signed-off-by: NChao Yu <yuchao0@huawei.com>
Signed-off-by: NJaegeuk Kim <jaegeuk@kernel.org>
上级 fef4129e
...@@ -957,6 +957,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) ...@@ -957,6 +957,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
return err; return err;
} }
if (direct_io && allow_outplace_dio(inode, iocb, from))
return 0;
if (is_inode_flag_set(inode, FI_NO_PREALLOC)) if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0; return 0;
...@@ -970,6 +973,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) ...@@ -970,6 +973,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL; map.m_next_pgofs = NULL;
map.m_next_extent = NULL; map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE; map.m_seg_type = NO_CHECK_TYPE;
map.m_may_create = true;
if (direct_io) { if (direct_io) {
map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
...@@ -1028,7 +1032,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1028,7 +1032,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
unsigned int maxblocks = map->m_len; unsigned int maxblocks = map->m_len;
struct dnode_of_data dn; struct dnode_of_data dn;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int mode = create ? ALLOC_NODE : LOOKUP_NODE; int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
pgoff_t pgofs, end_offset, end; pgoff_t pgofs, end_offset, end;
int err = 0, ofs = 1; int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node; unsigned int ofs_in_node, last_ofs_in_node;
...@@ -1062,7 +1066,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1062,7 +1066,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
} }
next_dnode: next_dnode:
if (create) if (map->m_may_create)
__do_map_lock(sbi, flag, true); __do_map_lock(sbi, flag, true);
/* When reading holes, we need its node page */ /* When reading holes, we need its node page */
...@@ -1099,8 +1103,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1099,8 +1103,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
if (is_valid_data_blkaddr(sbi, blkaddr)) { if (is_valid_data_blkaddr(sbi, blkaddr)) {
/* use out-place-update for driect IO under LFS mode */ /* use out-place-update for driect IO under LFS mode */
if (test_opt(sbi, LFS) && create && if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
flag == F2FS_GET_BLOCK_DIO) { map->m_may_create) {
err = __allocate_data_block(&dn, map->m_seg_type); err = __allocate_data_block(&dn, map->m_seg_type);
if (!err) if (!err)
set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_APPEND_WRITE);
...@@ -1209,7 +1213,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1209,7 +1213,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
f2fs_put_dnode(&dn); f2fs_put_dnode(&dn);
if (create) { if (map->m_may_create) {
__do_map_lock(sbi, flag, false); __do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed); f2fs_balance_fs(sbi, dn.node_changed);
} }
...@@ -1235,7 +1239,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, ...@@ -1235,7 +1239,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
} }
f2fs_put_dnode(&dn); f2fs_put_dnode(&dn);
unlock_out: unlock_out:
if (create) { if (map->m_may_create) {
__do_map_lock(sbi, flag, false); __do_map_lock(sbi, flag, false);
f2fs_balance_fs(sbi, dn.node_changed); f2fs_balance_fs(sbi, dn.node_changed);
} }
...@@ -1271,7 +1275,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) ...@@ -1271,7 +1275,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
static int __get_data_block(struct inode *inode, sector_t iblock, static int __get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create, int flag, struct buffer_head *bh, int create, int flag,
pgoff_t *next_pgofs, int seg_type) pgoff_t *next_pgofs, int seg_type, bool may_write)
{ {
struct f2fs_map_blocks map; struct f2fs_map_blocks map;
int err; int err;
...@@ -1281,6 +1285,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, ...@@ -1281,6 +1285,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
map.m_next_pgofs = next_pgofs; map.m_next_pgofs = next_pgofs;
map.m_next_extent = NULL; map.m_next_extent = NULL;
map.m_seg_type = seg_type; map.m_seg_type = seg_type;
map.m_may_create = may_write;
err = f2fs_map_blocks(inode, &map, create, flag); err = f2fs_map_blocks(inode, &map, create, flag);
if (!err) { if (!err) {
...@@ -1297,7 +1302,16 @@ static int get_data_block(struct inode *inode, sector_t iblock, ...@@ -1297,7 +1302,16 @@ static int get_data_block(struct inode *inode, sector_t iblock,
{ {
return __get_data_block(inode, iblock, bh_result, create, return __get_data_block(inode, iblock, bh_result, create,
flag, next_pgofs, flag, next_pgofs,
NO_CHECK_TYPE); NO_CHECK_TYPE, create);
}
static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DIO, NULL,
f2fs_rw_hint_to_seg_type(inode->i_write_hint),
true);
} }
static int get_data_block_dio(struct inode *inode, sector_t iblock, static int get_data_block_dio(struct inode *inode, sector_t iblock,
...@@ -1305,8 +1319,8 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, ...@@ -1305,8 +1319,8 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
{ {
return __get_data_block(inode, iblock, bh_result, create, return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DIO, NULL, F2FS_GET_BLOCK_DIO, NULL,
f2fs_rw_hint_to_seg_type( f2fs_rw_hint_to_seg_type(inode->i_write_hint),
inode->i_write_hint)); false);
} }
static int get_data_block_bmap(struct inode *inode, sector_t iblock, static int get_data_block_bmap(struct inode *inode, sector_t iblock,
...@@ -1318,7 +1332,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, ...@@ -1318,7 +1332,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock,
return __get_data_block(inode, iblock, bh_result, create, return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP, NULL, F2FS_GET_BLOCK_BMAP, NULL,
NO_CHECK_TYPE); NO_CHECK_TYPE, create);
} }
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
...@@ -1525,6 +1539,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, ...@@ -1525,6 +1539,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
map.m_next_pgofs = NULL; map.m_next_pgofs = NULL;
map.m_next_extent = NULL; map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE; map.m_seg_type = NO_CHECK_TYPE;
map.m_may_create = false;
for (; nr_pages; nr_pages--) { for (; nr_pages; nr_pages--) {
if (pages) { if (pages) {
...@@ -2642,7 +2657,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -2642,7 +2657,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
} }
err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
iter, get_data_block_dio, NULL, f2fs_dio_submit_bio, iter, rw == WRITE ? get_data_block_dio_write :
get_data_block_dio, NULL, f2fs_dio_submit_bio,
DIO_LOCKING | DIO_SKIP_HOLES); DIO_LOCKING | DIO_SKIP_HOLES);
if (do_opu) if (do_opu)
......
...@@ -602,6 +602,7 @@ struct f2fs_map_blocks { ...@@ -602,6 +602,7 @@ struct f2fs_map_blocks {
pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
pgoff_t *m_next_extent; /* point to next possible extent */ pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type; int m_seg_type;
bool m_may_create; /* indicate it is from write path */
}; };
/* for flag in get_data_block */ /* for flag in get_data_block */
......
...@@ -1499,7 +1499,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, ...@@ -1499,7 +1499,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
{ {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL, struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
.m_may_create = true };
pgoff_t pg_end; pgoff_t pg_end;
loff_t new_size = i_size_read(inode); loff_t new_size = i_size_read(inode);
loff_t off_end; loff_t off_end;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册