提交 fd801452 编写于 作者: L Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  JBD/JBD2: free j_wbuf if journal init fails.
  ext3: Wait for proper transaction commit on fsync
  ext3: retry failed direct IO allocations
...@@ -46,19 +46,21 @@ ...@@ -46,19 +46,21 @@
int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
{ {
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct ext3_inode_info *ei = EXT3_I(inode);
journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
int ret = 0; int ret = 0;
tid_t commit_tid;
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
J_ASSERT(ext3_journal_current_handle() == NULL); J_ASSERT(ext3_journal_current_handle() == NULL);
/* /*
* data=writeback: * data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data. * The caller's filemap_fdatawrite()/wait will sync the data.
* sync_inode() will sync the metadata * Metadata is in the journal, we wait for a proper transaction
* * to commit here.
* data=ordered:
* The caller's filemap_fdatawrite() will write the data and
* sync_inode() will write the inode if it is dirty. Then the caller's
* filemap_fdatawait() will wait on the pages.
* *
* data=journal: * data=journal:
* filemap_fdatawrite won't do anything (the buffers are clean). * filemap_fdatawrite won't do anything (the buffers are clean).
...@@ -73,22 +75,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) ...@@ -73,22 +75,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
goto out; goto out;
} }
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) if (datasync)
goto flush; commit_tid = atomic_read(&ei->i_datasync_tid);
else
commit_tid = atomic_read(&ei->i_sync_tid);
/* if (log_start_commit(journal, commit_tid)) {
* The VFS has written the file data. If the inode is unaltered log_wait_commit(journal, commit_tid);
* then we need not start a commit.
*/
if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0, /* sys_fsync did this */
};
ret = sync_inode(inode, &wbc);
goto out; goto out;
} }
flush:
/* /*
* In case we didn't commit a transaction, we have to flush * In case we didn't commit a transaction, we have to flush
* disk caches manually so that data really is on persistent * disk caches manually so that data really is on persistent
......
...@@ -699,8 +699,9 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, ...@@ -699,8 +699,9 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
int err = 0; int err = 0;
struct ext3_block_alloc_info *block_i; struct ext3_block_alloc_info *block_i;
ext3_fsblk_t current_block; ext3_fsblk_t current_block;
struct ext3_inode_info *ei = EXT3_I(inode);
block_i = EXT3_I(inode)->i_block_alloc_info; block_i = ei->i_block_alloc_info;
/* /*
* If we're splicing into a [td]indirect block (as opposed to the * If we're splicing into a [td]indirect block (as opposed to the
* inode) then we need to get write access to the [td]indirect block * inode) then we need to get write access to the [td]indirect block
...@@ -741,6 +742,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, ...@@ -741,6 +742,8 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
inode->i_ctime = CURRENT_TIME_SEC; inode->i_ctime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, inode); ext3_mark_inode_dirty(handle, inode);
/* ext3_mark_inode_dirty already updated i_sync_tid */
atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
/* had we spliced it onto indirect block? */ /* had we spliced it onto indirect block? */
if (where->bh) { if (where->bh) {
...@@ -1735,6 +1738,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, ...@@ -1735,6 +1738,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
ssize_t ret; ssize_t ret;
int orphan = 0; int orphan = 0;
size_t count = iov_length(iov, nr_segs); size_t count = iov_length(iov, nr_segs);
int retries = 0;
if (rw == WRITE) { if (rw == WRITE) {
loff_t final_size = offset + count; loff_t final_size = offset + count;
...@@ -1757,9 +1761,12 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, ...@@ -1757,9 +1761,12 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
} }
} }
retry:
ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, offset, nr_segs,
ext3_get_block, NULL); ext3_get_block, NULL);
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
if (orphan) { if (orphan) {
int err; int err;
...@@ -2750,6 +2757,8 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) ...@@ -2750,6 +2757,8 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
struct ext3_inode_info *ei; struct ext3_inode_info *ei;
struct buffer_head *bh; struct buffer_head *bh;
struct inode *inode; struct inode *inode;
journal_t *journal = EXT3_SB(sb)->s_journal;
transaction_t *transaction;
long ret; long ret;
int block; int block;
...@@ -2827,6 +2836,30 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) ...@@ -2827,6 +2836,30 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
ei->i_data[block] = raw_inode->i_block[block]; ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->i_orphan);
/*
* Set transaction id's of transactions that have to be committed
* to finish f[data]sync. We set them to currently running transaction
* as we cannot be sure that the inode or some of its metadata isn't
* part of the transaction - the inode could have been reclaimed and
* now it is reread from disk.
*/
if (journal) {
tid_t tid;
spin_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
transaction = journal->j_running_transaction;
else
transaction = journal->j_committing_transaction;
if (transaction)
tid = transaction->t_tid;
else
tid = journal->j_commit_sequence;
spin_unlock(&journal->j_state_lock);
atomic_set(&ei->i_sync_tid, tid);
atomic_set(&ei->i_datasync_tid, tid);
}
if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 && if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
/* /*
...@@ -3011,6 +3044,7 @@ static int ext3_do_update_inode(handle_t *handle, ...@@ -3011,6 +3044,7 @@ static int ext3_do_update_inode(handle_t *handle,
err = rc; err = rc;
ei->i_state &= ~EXT3_STATE_NEW; ei->i_state &= ~EXT3_STATE_NEW;
atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
out_brelse: out_brelse:
brelse (bh); brelse (bh);
ext3_std_error(inode->i_sb, err); ext3_std_error(inode->i_sb, err);
......
...@@ -466,6 +466,8 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) ...@@ -466,6 +466,8 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return NULL; return NULL;
ei->i_block_alloc_info = NULL; ei->i_block_alloc_info = NULL;
ei->vfs_inode.i_version = 1; ei->vfs_inode.i_version = 1;
atomic_set(&ei->i_datasync_tid, 0);
atomic_set(&ei->i_sync_tid, 0);
return &ei->vfs_inode; return &ei->vfs_inode;
} }
......
...@@ -756,6 +756,7 @@ journal_t * journal_init_dev(struct block_device *bdev, ...@@ -756,6 +756,7 @@ journal_t * journal_init_dev(struct block_device *bdev,
return journal; return journal;
out_err: out_err:
kfree(journal->j_wbuf);
kfree(journal); kfree(journal);
return NULL; return NULL;
} }
...@@ -820,6 +821,7 @@ journal_t * journal_init_inode (struct inode *inode) ...@@ -820,6 +821,7 @@ journal_t * journal_init_inode (struct inode *inode)
return journal; return journal;
out_err: out_err:
kfree(journal->j_wbuf);
kfree(journal); kfree(journal);
return NULL; return NULL;
} }
......
...@@ -913,6 +913,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, ...@@ -913,6 +913,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
return journal; return journal;
out_err: out_err:
kfree(journal->j_wbuf);
jbd2_stats_proc_exit(journal); jbd2_stats_proc_exit(journal);
kfree(journal); kfree(journal);
return NULL; return NULL;
...@@ -986,6 +987,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) ...@@ -986,6 +987,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
return journal; return journal;
out_err: out_err:
kfree(journal->j_wbuf);
jbd2_stats_proc_exit(journal); jbd2_stats_proc_exit(journal);
kfree(journal); kfree(journal);
return NULL; return NULL;
......
...@@ -137,6 +137,14 @@ struct ext3_inode_info { ...@@ -137,6 +137,14 @@ struct ext3_inode_info {
* by other means, so we have truncate_mutex. * by other means, so we have truncate_mutex.
*/ */
struct mutex truncate_mutex; struct mutex truncate_mutex;
/*
* Transactions that contain inode's metadata needed to complete
* fsync and fdatasync, respectively.
*/
atomic_t i_sync_tid;
atomic_t i_datasync_tid;
struct inode vfs_inode; struct inode vfs_inode;
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册