提交 a549984b 编写于 作者: T Theodore Ts'o

ext4: revert "ext4: use io_end for multiple bios"

This reverts commit 4eec708d.

Multiple users have reported crashes which is apparently caused by
this commit.  Thanks to Dmitry Monakhov for bisecting it.
Signed-off-by: N"Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Jan Kara <jack@suse.cz>
上级 e6155736
...@@ -209,7 +209,6 @@ typedef struct ext4_io_end { ...@@ -209,7 +209,6 @@ typedef struct ext4_io_end {
ssize_t size; /* size of the extent */ ssize_t size; /* size of the extent */
struct kiocb *iocb; /* iocb struct for AIO */ struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */ int result; /* error value for AIO */
atomic_t count; /* reference counter */
} ext4_io_end_t; } ext4_io_end_t;
struct ext4_io_submit { struct ext4_io_submit {
...@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, ...@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
/* page-io.c */ /* page-io.c */
extern int __init ext4_init_pageio(void); extern int __init ext4_init_pageio(void);
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
extern void ext4_exit_pageio(void); extern void ext4_exit_pageio(void);
extern void ext4_ioend_shutdown(struct inode *); extern void ext4_ioend_shutdown(struct inode *);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
extern int ext4_put_io_end(ext4_io_end_t *io_end);
extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
extern void ext4_io_submit_init(struct ext4_io_submit *io,
struct writeback_control *wbc);
extern void ext4_end_io_work(struct work_struct *work); extern void ext4_end_io_work(struct work_struct *work);
extern void ext4_io_submit(struct ext4_io_submit *io); extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io, extern int ext4_bio_write_page(struct ext4_io_submit *io,
......
...@@ -1487,10 +1487,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, ...@@ -1487,10 +1487,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
struct ext4_io_submit io_submit; struct ext4_io_submit io_submit;
BUG_ON(mpd->next_page <= mpd->first_page); BUG_ON(mpd->next_page <= mpd->first_page);
ext4_io_submit_init(&io_submit, mpd->wbc); memset(&io_submit, 0, sizeof(io_submit));
io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
if (!io_submit.io_end)
return -ENOMEM;
/* /*
* We need to start from the first_page to the next_page - 1 * We need to start from the first_page to the next_page - 1
* to make sure we also write the mapped dirty buffer_heads. * to make sure we also write the mapped dirty buffer_heads.
...@@ -1578,8 +1575,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, ...@@ -1578,8 +1575,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
pagevec_release(&pvec); pagevec_release(&pvec);
} }
ext4_io_submit(&io_submit); ext4_io_submit(&io_submit);
/* Drop io_end reference we got from init */
ext4_put_io_end_defer(io_submit.io_end);
return ret; return ret;
} }
...@@ -2238,16 +2233,9 @@ static int ext4_writepage(struct page *page, ...@@ -2238,16 +2233,9 @@ static int ext4_writepage(struct page *page,
*/ */
return __ext4_journalled_writepage(page, len); return __ext4_journalled_writepage(page, len);
ext4_io_submit_init(&io_submit, wbc); memset(&io_submit, 0, sizeof(io_submit));
io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
if (!io_submit.io_end) {
redirty_page_for_writepage(wbc, page);
return -ENOMEM;
}
ret = ext4_bio_write_page(&io_submit, page, len, wbc); ret = ext4_bio_write_page(&io_submit, page, len, wbc);
ext4_io_submit(&io_submit); ext4_io_submit(&io_submit);
/* Drop io_end reference we got from init */
ext4_put_io_end_defer(io_submit.io_end);
return ret; return ret;
} }
...@@ -3078,13 +3066,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ...@@ -3078,13 +3066,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
ext4_io_end_t *io_end = iocb->private; ext4_io_end_t *io_end = iocb->private;
/* if not async direct IO just return */ /* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end) { if (!io_end || !size)
inode_dio_done(inode); goto out;
if (is_async)
aio_complete(iocb, ret, 0);
return;
}
ext_debug("ext4_end_io_dio(): io_end 0x%p " ext_debug("ext4_end_io_dio(): io_end 0x%p "
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n", "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
...@@ -3092,13 +3076,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ...@@ -3092,13 +3076,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
size); size);
iocb->private = NULL; iocb->private = NULL;
/* if not aio dio with unwritten extents, just free io and return */
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
out:
inode_dio_done(inode);
if (is_async)
aio_complete(iocb, ret, 0);
return;
}
io_end->offset = offset; io_end->offset = offset;
io_end->size = size; io_end->size = size;
if (is_async) { if (is_async) {
io_end->iocb = iocb; io_end->iocb = iocb;
io_end->result = ret; io_end->result = ret;
} }
ext4_put_io_end_defer(io_end);
ext4_add_complete_io(io_end);
} }
/* /*
...@@ -3132,7 +3128,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -3132,7 +3128,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
get_block_t *get_block_func = NULL; get_block_t *get_block_func = NULL;
int dio_flags = 0; int dio_flags = 0;
loff_t final_size = offset + count; loff_t final_size = offset + count;
ext4_io_end_t *io_end = NULL;
/* Use the old path for reads and writes beyond i_size. */ /* Use the old path for reads and writes beyond i_size. */
if (rw != WRITE || final_size > inode->i_size) if (rw != WRITE || final_size > inode->i_size)
...@@ -3171,16 +3166,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -3171,16 +3166,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
iocb->private = NULL; iocb->private = NULL;
ext4_inode_aio_set(inode, NULL); ext4_inode_aio_set(inode, NULL);
if (!is_sync_kiocb(iocb)) { if (!is_sync_kiocb(iocb)) {
io_end = ext4_init_io_end(inode, GFP_NOFS); ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
if (!io_end) { if (!io_end) {
ret = -ENOMEM; ret = -ENOMEM;
goto retake_lock; goto retake_lock;
} }
io_end->flag |= EXT4_IO_END_DIRECT; io_end->flag |= EXT4_IO_END_DIRECT;
/* iocb->private = io_end;
* Grab reference for DIO. Will be dropped in ext4_end_io_dio()
*/
iocb->private = ext4_get_io_end(io_end);
/* /*
* we save the io structure for current async direct * we save the io structure for current async direct
* IO, so that later ext4_map_blocks() could flag the * IO, so that later ext4_map_blocks() could flag the
...@@ -3204,27 +3196,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -3204,27 +3196,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
NULL, NULL,
dio_flags); dio_flags);
if (iocb->private)
ext4_inode_aio_set(inode, NULL);
/* /*
* Put our reference to io_end. This can free the io_end structure e.g. * The io_end structure takes a reference to the inode, that
* in sync IO case or in case of error. It can even perform extent * structure needs to be destroyed and the reference to the
* conversion if all bios we submitted finished before we got here. * inode need to be dropped, when IO is complete, even with 0
* Note that in that case iocb->private can be already set to NULL * byte write, or failed.
* here. *
* In the successful AIO DIO case, the io_end structure will
* be destroyed and the reference to the inode will be dropped
* after the end_io call back function is called.
*
* In the case there is 0 byte write, or error case, since VFS
* direct IO won't invoke the end_io call back function, we
* need to free the end_io structure here.
*/ */
if (io_end) { if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
ext4_inode_aio_set(inode, NULL); ext4_free_io_end(iocb->private);
ext4_put_io_end(io_end); iocb->private = NULL;
/* } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
* In case of error or no write ext4_end_io_dio() was not
* called so we have to put iocb's reference.
*/
if (ret <= 0 && ret != -EIOCBQUEUED) {
WARN_ON(iocb->private != io_end);
ext4_put_io_end(io_end);
iocb->private = NULL;
}
}
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) { EXT4_STATE_DIO_UNWRITTEN)) {
int err; int err;
/* /*
......
...@@ -61,28 +61,15 @@ void ext4_ioend_shutdown(struct inode *inode) ...@@ -61,28 +61,15 @@ void ext4_ioend_shutdown(struct inode *inode)
cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
} }
static void ext4_release_io_end(ext4_io_end_t *io_end) void ext4_free_io_end(ext4_io_end_t *io)
{ {
BUG_ON(!list_empty(&io_end->list)); BUG_ON(!io);
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); BUG_ON(!list_empty(&io->list));
BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
wake_up_all(ext4_ioend_wq(io_end->inode));
if (io_end->flag & EXT4_IO_END_DIRECT)
inode_dio_done(io_end->inode);
if (io_end->iocb)
aio_complete(io_end->iocb, io_end->result, 0);
kmem_cache_free(io_end_cachep, io_end);
}
static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
{
struct inode *inode = io_end->inode;
io_end->flag &= ~EXT4_IO_END_UNWRITTEN; if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
/* Wake up anyone waiting on unwritten extent conversion */ wake_up_all(ext4_ioend_wq(io->inode));
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) kmem_cache_free(io_end_cachep, io);
wake_up_all(ext4_ioend_wq(inode));
} }
/* check a range of space and convert unwritten extents to written. */ /* check a range of space and convert unwritten extents to written. */
...@@ -105,8 +92,13 @@ static int ext4_end_io(ext4_io_end_t *io) ...@@ -105,8 +92,13 @@ static int ext4_end_io(ext4_io_end_t *io)
"(inode %lu, offset %llu, size %zd, error %d)", "(inode %lu, offset %llu, size %zd, error %d)",
inode->i_ino, offset, size, ret); inode->i_ino, offset, size, ret);
} }
ext4_clear_io_unwritten_flag(io); /* Wake up anyone waiting on unwritten extent conversion */
ext4_release_io_end(io); if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
wake_up_all(ext4_ioend_wq(inode));
if (io->flag & EXT4_IO_END_DIRECT)
inode_dio_done(inode);
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
return ret; return ret;
} }
...@@ -137,7 +129,7 @@ static void dump_completed_IO(struct inode *inode) ...@@ -137,7 +129,7 @@ static void dump_completed_IO(struct inode *inode)
} }
/* Add the io_end to per-inode completed end_io list. */ /* Add the io_end to per-inode completed end_io list. */
static void ext4_add_complete_io(ext4_io_end_t *io_end) void ext4_add_complete_io(ext4_io_end_t *io_end)
{ {
struct ext4_inode_info *ei = EXT4_I(io_end->inode); struct ext4_inode_info *ei = EXT4_I(io_end->inode);
struct workqueue_struct *wq; struct workqueue_struct *wq;
...@@ -174,6 +166,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode) ...@@ -174,6 +166,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
err = ext4_end_io(io); err = ext4_end_io(io);
if (unlikely(!ret && err)) if (unlikely(!ret && err))
ret = err; ret = err;
io->flag &= ~EXT4_IO_END_UNWRITTEN;
ext4_free_io_end(io);
} }
return ret; return ret;
} }
...@@ -205,43 +199,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) ...@@ -205,43 +199,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
atomic_inc(&EXT4_I(inode)->i_ioend_count); atomic_inc(&EXT4_I(inode)->i_ioend_count);
io->inode = inode; io->inode = inode;
INIT_LIST_HEAD(&io->list); INIT_LIST_HEAD(&io->list);
atomic_set(&io->count, 1);
} }
return io; return io;
} }
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{
if (atomic_dec_and_test(&io_end->count)) {
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
ext4_release_io_end(io_end);
return;
}
ext4_add_complete_io(io_end);
}
}
int ext4_put_io_end(ext4_io_end_t *io_end)
{
int err = 0;
if (atomic_dec_and_test(&io_end->count)) {
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
err = ext4_convert_unwritten_extents(io_end->inode,
io_end->offset, io_end->size);
ext4_clear_io_unwritten_flag(io_end);
}
ext4_release_io_end(io_end);
}
return err;
}
ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
{
atomic_inc(&io_end->count);
return io_end;
}
/* /*
* Print an buffer I/O error compatible with the fs/buffer.c. This * Print an buffer I/O error compatible with the fs/buffer.c. This
* provides compatibility with dmesg scrapers that look for a specific * provides compatibility with dmesg scrapers that look for a specific
...@@ -324,7 +285,12 @@ static void ext4_end_bio(struct bio *bio, int error) ...@@ -324,7 +285,12 @@ static void ext4_end_bio(struct bio *bio, int error)
bi_sector >> (inode->i_blkbits - 9)); bi_sector >> (inode->i_blkbits - 9));
} }
ext4_put_io_end_defer(io_end); if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
return;
}
ext4_add_complete_io(io_end);
} }
void ext4_io_submit(struct ext4_io_submit *io) void ext4_io_submit(struct ext4_io_submit *io)
...@@ -338,37 +304,40 @@ void ext4_io_submit(struct ext4_io_submit *io) ...@@ -338,37 +304,40 @@ void ext4_io_submit(struct ext4_io_submit *io)
bio_put(io->io_bio); bio_put(io->io_bio);
} }
io->io_bio = NULL; io->io_bio = NULL;
} io->io_op = 0;
void ext4_io_submit_init(struct ext4_io_submit *io,
struct writeback_control *wbc)
{
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
io->io_bio = NULL;
io->io_end = NULL; io->io_end = NULL;
} }
static int io_submit_init_bio(struct ext4_io_submit *io, static int io_submit_init(struct ext4_io_submit *io,
struct buffer_head *bh) struct inode *inode,
struct writeback_control *wbc,
struct buffer_head *bh)
{ {
ext4_io_end_t *io_end;
struct page *page = bh->b_page;
int nvecs = bio_get_nr_vecs(bh->b_bdev); int nvecs = bio_get_nr_vecs(bh->b_bdev);
struct bio *bio; struct bio *bio;
io_end = ext4_init_io_end(inode, GFP_NOFS);
if (!io_end)
return -ENOMEM;
bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev; bio->bi_bdev = bh->b_bdev;
bio->bi_private = io->io_end = io_end;
bio->bi_end_io = ext4_end_bio; bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
if (!io->io_end->size) io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
+ bh_offset(bh);
io->io_bio = bio; io->io_bio = bio;
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
io->io_next_block = bh->b_blocknr; io->io_next_block = bh->b_blocknr;
return 0; return 0;
} }
static int io_submit_add_bh(struct ext4_io_submit *io, static int io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode, struct inode *inode,
struct writeback_control *wbc,
struct buffer_head *bh) struct buffer_head *bh)
{ {
ext4_io_end_t *io_end; ext4_io_end_t *io_end;
...@@ -379,18 +348,18 @@ static int io_submit_add_bh(struct ext4_io_submit *io, ...@@ -379,18 +348,18 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
ext4_io_submit(io); ext4_io_submit(io);
} }
if (io->io_bio == NULL) { if (io->io_bio == NULL) {
ret = io_submit_init_bio(io, bh); ret = io_submit_init(io, inode, wbc, bh);
if (ret) if (ret)
return ret; return ret;
} }
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
io_end = io->io_end; io_end = io->io_end;
if (test_clear_buffer_uninit(bh)) if (test_clear_buffer_uninit(bh))
ext4_set_io_unwritten_flag(inode, io_end); ext4_set_io_unwritten_flag(inode, io_end);
io_end->size += bh->b_size; io->io_end->size += bh->b_size;
io->io_next_block++; io->io_next_block++;
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
return 0; return 0;
} }
...@@ -462,7 +431,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, ...@@ -462,7 +431,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do { do {
if (!buffer_async_write(bh)) if (!buffer_async_write(bh))
continue; continue;
ret = io_submit_add_bh(io, inode, bh); ret = io_submit_add_bh(io, inode, wbc, bh);
if (ret) { if (ret) {
/* /*
* We only get here on ENOMEM. Not much else * We only get here on ENOMEM. Not much else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册