提交 6b523df4 编写于 作者: J Jan Kara 提交者: Theodore Ts'o

ext4: use transaction reservation for extent conversion in ext4_end_io

Later we would like to clear PageWriteback bit only after extent
conversion from unwritten to written extents is performed.  However it
is not possible to start a transaction after PageWriteback is set
because that violates lock ordering (and is easy to deadlock).  So we
have to reserve a transaction before locking pages and sending them
for IO and later we use the transaction for extent conversion from
ext4_end_io().
Reviewed-by: NZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: NJan Kara <jack@suse.cz>
Signed-off-by: N"Theodore Ts'o" <tytso@mit.edu>
上级 3613d228
...@@ -184,10 +184,13 @@ struct ext4_map_blocks { ...@@ -184,10 +184,13 @@ struct ext4_map_blocks {
#define EXT4_IO_END_DIRECT 0x0004 #define EXT4_IO_END_DIRECT 0x0004
/* /*
* For converting uninitialized extents on a work queue. * For converting uninitialized extents on a work queue. 'handle' is used for
* buffered writeback.
*/ */
typedef struct ext4_io_end { typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */ struct list_head list; /* per-file finished IO list */
handle_t *handle; /* handle reserved for extent
* conversion */
struct inode *inode; /* file being written to */ struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */ unsigned int flag; /* unwritten or not */
loff_t offset; /* offset in the file */ loff_t offset; /* offset in the file */
...@@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, ...@@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
struct ext4_io_end *io_end) struct ext4_io_end *io_end)
{ {
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
/* Writeback has to have coversion transaction reserved */
WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle &&
!(io_end->flag & EXT4_IO_END_DIRECT));
io_end->flag |= EXT4_IO_END_UNWRITTEN; io_end->flag |= EXT4_IO_END_UNWRITTEN;
atomic_inc(&EXT4_I(inode)->i_unwritten); atomic_inc(&EXT4_I(inode)->i_unwritten);
} }
...@@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *); ...@@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *); extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct file *file, int mode, loff_t offset, extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
loff_t len); loff_t len);
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
ssize_t len); loff_t offset, ssize_t len);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags); struct ext4_map_blocks *map, int flags);
extern int ext4_ext_calc_metadata_amount(struct inode *inode, extern int ext4_ext_calc_metadata_amount(struct inode *inode,
......
...@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode) ...@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode)
#define EXT4_HT_MIGRATE 8 #define EXT4_HT_MIGRATE 8
#define EXT4_HT_MOVE_EXTENTS 9 #define EXT4_HT_MOVE_EXTENTS 9
#define EXT4_HT_XATTR 10 #define EXT4_HT_XATTR 10
#define EXT4_HT_MAX 11 #define EXT4_HT_EXT_CONVERT 11
#define EXT4_HT_MAX 12
/** /**
* struct ext4_journal_cb_entry - Base structure for callback information. * struct ext4_journal_cb_entry - Base structure for callback information.
...@@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode, ...@@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode,
#define ext4_journal_stop(handle) \ #define ext4_journal_stop(handle) \
__ext4_journal_stop(__func__, __LINE__, (handle)) __ext4_journal_stop(__func__, __LINE__, (handle))
#define ext4_journal_start_reserve(handle, type) \ #define ext4_journal_start_reserved(handle, type) \
__ext4_journal_start_reserved((handle), __LINE__, (type)) __ext4_journal_start_reserved((handle), __LINE__, (type))
handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
......
...@@ -4566,10 +4566,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -4566,10 +4566,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
* function, to convert the fallocated extents after IO is completed. * function, to convert the fallocated extents after IO is completed.
* Returns 0 on success. * Returns 0 on success.
*/ */
int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
ssize_t len) loff_t offset, ssize_t len)
{ {
handle_t *handle;
unsigned int max_blocks; unsigned int max_blocks;
int ret = 0; int ret = 0;
int ret2 = 0; int ret2 = 0;
...@@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, ...@@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
map.m_lblk); map.m_lblk);
/* /*
* credits to insert 1 extent into extent tree * This is somewhat ugly but the idea is clear: When transaction is
* reserved, everything goes into it. Otherwise we rather start several
* smaller transactions for conversion of each extent separately.
*/ */
credits = ext4_chunk_trans_blocks(inode, max_blocks); if (handle) {
handle = ext4_journal_start_reserved(handle,
EXT4_HT_EXT_CONVERT);
if (IS_ERR(handle))
return PTR_ERR(handle);
credits = 0;
} else {
/*
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks(inode, max_blocks);
}
while (ret >= 0 && ret < max_blocks) { while (ret >= 0 && ret < max_blocks) {
map.m_lblk += ret; map.m_lblk += ret;
map.m_len = (max_blocks -= ret); map.m_len = (max_blocks -= ret);
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); if (credits) {
if (IS_ERR(handle)) { handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
ret = PTR_ERR(handle); credits);
break; if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
break;
}
} }
ret = ext4_map_blocks(handle, inode, &map, ret = ext4_map_blocks(handle, inode, &map,
EXT4_GET_BLOCKS_IO_CONVERT_EXT); EXT4_GET_BLOCKS_IO_CONVERT_EXT);
...@@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, ...@@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
inode->i_ino, map.m_lblk, inode->i_ino, map.m_lblk,
map.m_len, ret); map.m_len, ret);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle); if (credits)
if (ret <= 0 || ret2 ) ret2 = ext4_journal_stop(handle);
if (ret <= 0 || ret2)
break; break;
} }
if (!credits)
ret2 = ext4_journal_stop(handle);
return ret > 0 ? ret2 : ret; return ret > 0 ? ret2 : ret;
} }
......
...@@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page, ...@@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page,
struct mpage_da_data { struct mpage_da_data {
struct inode *inode; struct inode *inode;
struct writeback_control *wbc; struct writeback_control *wbc;
pgoff_t first_page; /* The first page to write */ pgoff_t first_page; /* The first page to write */
pgoff_t next_page; /* Current page to examine */ pgoff_t next_page; /* Current page to examine */
pgoff_t last_page; /* Last page to examine */ pgoff_t last_page; /* Last page to examine */
...@@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) ...@@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
err = ext4_map_blocks(handle, inode, map, get_blocks_flags); err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
if (err < 0) if (err < 0)
return err; return err;
if (map->m_flags & EXT4_MAP_UNINIT) if (map->m_flags & EXT4_MAP_UNINIT) {
if (!mpd->io_submit.io_end->handle &&
ext4_handle_valid(handle)) {
mpd->io_submit.io_end->handle = handle->h_rsv_handle;
handle->h_rsv_handle = NULL;
}
ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
}
BUG_ON(map->m_len == 0); BUG_ON(map->m_len == 0);
if (map->m_flags & EXT4_MAP_NEW) { if (map->m_flags & EXT4_MAP_NEW) {
...@@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping,
handle_t *handle = NULL; handle_t *handle = NULL;
struct mpage_da_data mpd; struct mpage_da_data mpd;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
int needed_blocks, ret = 0; int needed_blocks, rsv_blocks = 0, ret = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
bool done; bool done;
struct blk_plug plug; struct blk_plug plug;
...@@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping,
if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
return -EROFS; return -EROFS;
if (ext4_should_dioread_nolock(inode)) {
/*
* We may need to convert upto one extent per block in
* the page and we may dirty the inode.
*/
rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
}
/* /*
* If we have inline data and arrive here, it means that * If we have inline data and arrive here, it means that
* we will soon create the block for the 1st page, so * we will soon create the block for the 1st page, so
...@@ -2438,8 +2453,8 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2438,8 +2453,8 @@ static int ext4_da_writepages(struct address_space *mapping,
needed_blocks = ext4_da_writepages_trans_blocks(inode); needed_blocks = ext4_da_writepages_trans_blocks(inode);
/* start a new transaction */ /* start a new transaction */
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, handle = ext4_journal_start_with_reserve(inode,
needed_blocks); EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
...@@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
* for non AIO case, since the IO is already * for non AIO case, since the IO is already
* completed, we could do the conversion right here * completed, we could do the conversion right here
*/ */
err = ext4_convert_unwritten_extents(inode, err = ext4_convert_unwritten_extents(NULL, inode,
offset, ret); offset, ret);
if (err < 0) if (err < 0)
ret = err; ret = err;
......
...@@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) ...@@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
{ {
BUG_ON(!list_empty(&io_end->list)); BUG_ON(!list_empty(&io_end->list));
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
WARN_ON(io_end->handle);
if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
wake_up_all(ext4_ioend_wq(io_end->inode)); wake_up_all(ext4_ioend_wq(io_end->inode));
...@@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io) ...@@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io)
struct inode *inode = io->inode; struct inode *inode = io->inode;
loff_t offset = io->offset; loff_t offset = io->offset;
ssize_t size = io->size; ssize_t size = io->size;
handle_t *handle = io->handle;
int ret = 0; int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n", "list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev); io, inode->i_ino, io->list.next, io->list.prev);
ret = ext4_convert_unwritten_extents(inode, offset, size); io->handle = NULL; /* Following call will use up the handle */
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
if (ret < 0) { if (ret < 0) {
ext4_msg(inode->i_sb, KERN_EMERG, ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written " "failed to convert unwritten extents to written "
...@@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end) ...@@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
if (atomic_dec_and_test(&io_end->count)) { if (atomic_dec_and_test(&io_end->count)) {
if (io_end->flag & EXT4_IO_END_UNWRITTEN) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
err = ext4_convert_unwritten_extents(io_end->inode, err = ext4_convert_unwritten_extents(io_end->handle,
io_end->offset, io_end->size); io_end->inode, io_end->offset,
io_end->size);
io_end->handle = NULL;
ext4_clear_io_unwritten_flag(io_end); ext4_clear_io_unwritten_flag(io_end);
} }
ext4_release_io_end(io_end); ext4_release_io_end(io_end);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册