提交 f6b1495f 编写于 作者: L Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "All cleanups and bug fixes; most notably, fix some problems discovered
  in ext4's NFS support, and fix an ioctl (EXT4_IOC_GROUP_ADD) used by
  old versions of e2fsprogs which we accidentally broke a while back.

  Also fixed some error paths in ext4's quota and inline data support.

  Finally, improve tail latency in jbd2's commit code"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: check for shutdown and r/o file system in ext4_write_inode()
  ext4: force inode writes when nfsd calls commit_metadata()
  ext4: avoid declaring fs inconsistent due to invalid file handles
  ext4: include terminating u32 in size of xattr entries when expanding inodes
  ext4: compare old and new mode before setting update_mode flag
  ext4: fix EXT4_IOC_GROUP_ADD ioctl
  ext4: hard fail dax mount on unsupported devices
  jbd2: update locking documentation for transaction_t
  ext4: remove redundant condition check
  jbd2: clean up indentation issue, replace spaces with tab
  ext4: clean up indentation issues, remove extraneous tabs
  ext4: missing unlock/put_page() in ext4_try_to_write_inline_data()
  ext4: fix possible use after free in ext4_quota_enable
  jbd2: avoid long hold times of j_state_lock while committing a transaction
  ext4: add ext4_sb_bread() to disambiguate ENOMEM cases
......@@ -248,7 +248,8 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
error = posix_acl_update_mode(inode, &mode, &acl);
if (error)
goto out_stop;
update_mode = 1;
if (mode != inode->i_mode)
update_mode = 1;
}
error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
......
......@@ -2454,8 +2454,19 @@ int do_journal_get_write_access(handle_t *handle,
#define FALL_BACK_TO_NONDELALLOC 1
#define CONVERT_INLINE_DATA 2
extern struct inode *ext4_iget(struct super_block *, unsigned long);
extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
typedef enum {
EXT4_IGET_NORMAL = 0,
EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */
} ext4_iget_flags;
extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function,
unsigned int line);
#define ext4_iget(sb, ino, flags) \
__ext4_iget((sb), (ino), (flags), __func__, __LINE__)
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
......@@ -2538,6 +2549,8 @@ extern int ext4_group_extend(struct super_block *sb,
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
/* super.c */
extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
sector_t block, int op_flags);
extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
extern int ext4_calculate_overhead(struct super_block *sb);
extern void ext4_superblock_csum_set(struct super_block *sb);
......
......@@ -1225,7 +1225,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
if (!ext4_test_bit(bit, bitmap_bh->b_data))
goto bad_orphan;
inode = ext4_iget(sb, ino);
inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
......
......@@ -705,8 +705,11 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
if (!PageUptodate(page)) {
ret = ext4_read_inline_page(inode, page);
if (ret < 0)
if (ret < 0) {
unlock_page(page);
put_page(page);
goto out_up_read;
}
}
ret = 1;
......
......@@ -4817,7 +4817,9 @@ static inline u64 ext4_inode_peek_iversion(const struct inode *inode)
return inode_peek_iversion(inode);
}
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function,
unsigned int line)
{
struct ext4_iloc iloc;
struct ext4_inode *raw_inode;
......@@ -4831,6 +4833,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
gid_t i_gid;
projid_t i_projid;
if (((flags & EXT4_IGET_NORMAL) &&
(ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
(ino < EXT4_ROOT_INO) ||
(ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
if (flags & EXT4_IGET_HANDLE)
return ERR_PTR(-ESTALE);
__ext4_error(sb, function, line,
"inode #%lu: comm %s: iget: illegal inode #",
ino, current->comm);
return ERR_PTR(-EFSCORRUPTED);
}
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
......@@ -4846,18 +4860,26 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
raw_inode = ext4_raw_inode(&iloc);
if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
EXT4_ERROR_INODE(inode, "root inode unallocated");
ext4_error_inode(inode, function, line, 0,
"iget: root inode unallocated");
ret = -EFSCORRUPTED;
goto bad_inode;
}
if ((flags & EXT4_IGET_HANDLE) &&
(raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
ret = -ESTALE;
goto bad_inode;
}
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb) ||
(ei->i_extra_isize & 3)) {
EXT4_ERROR_INODE(inode,
"bad extra_isize %u (inode size %u)",
ext4_error_inode(inode, function, line, 0,
"iget: bad extra_isize %u "
"(inode size %u)",
ei->i_extra_isize,
EXT4_INODE_SIZE(inode->i_sb));
ret = -EFSCORRUPTED;
......@@ -4879,7 +4901,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
EXT4_ERROR_INODE(inode, "checksum invalid");
ext4_error_inode(inode, function, line, 0,
"iget: checksum invalid");
ret = -EFSBADCRC;
goto bad_inode;
}
......@@ -4936,7 +4959,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(sb, raw_inode);
if ((size = i_size_read(inode)) < 0) {
EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
ext4_error_inode(inode, function, line, 0,
"iget: bad i_size value: %lld", size);
ret = -EFSCORRUPTED;
goto bad_inode;
}
......@@ -5012,7 +5036,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0;
if (ei->i_file_acl &&
!ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
ext4_error_inode(inode, function, line, 0,
"iget: bad extended attribute block %llu",
ei->i_file_acl);
ret = -EFSCORRUPTED;
goto bad_inode;
......@@ -5040,8 +5065,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
} else if (S_ISLNK(inode->i_mode)) {
/* VFS does not allow setting these so must be corruption */
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
EXT4_ERROR_INODE(inode,
"immutable or append flags not allowed on symlinks");
ext4_error_inode(inode, function, line, 0,
"iget: immutable or append flags "
"not allowed on symlinks");
ret = -EFSCORRUPTED;
goto bad_inode;
}
......@@ -5071,7 +5097,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
make_bad_inode(inode);
} else {
ret = -EFSCORRUPTED;
EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
ext4_error_inode(inode, function, line, 0,
"iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
brelse(iloc.bh);
......@@ -5085,13 +5112,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
return ERR_PTR(ret);
}
struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
{
if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
return ERR_PTR(-EFSCORRUPTED);
return ext4_iget(sb, ino);
}
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
......@@ -5380,9 +5400,13 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int err;
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
sb_rdonly(inode->i_sb))
return 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
if (EXT4_SB(inode->i_sb)->s_journal) {
if (ext4_journal_current_handle()) {
jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
......@@ -5398,7 +5422,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
return 0;
err = ext4_force_commit(inode->i_sb);
err = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
EXT4_I(inode)->i_sync_tid);
} else {
struct ext4_iloc iloc;
......
......@@ -125,7 +125,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
return -EPERM;
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode_bl))
return PTR_ERR(inode_bl);
ei_bl = EXT4_I(inode_bl);
......
......@@ -116,9 +116,9 @@ static int update_ind_extent_range(handle_t *handle, struct inode *inode,
int i, retval = 0;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
bh = ext4_sb_bread(inode->i_sb, pblock, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
......@@ -145,9 +145,9 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
int i, retval = 0;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
bh = ext4_sb_bread(inode->i_sb, pblock, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
......@@ -175,9 +175,9 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
int i, retval = 0;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
bh = ext4_sb_bread(inode->i_sb, pblock, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
......@@ -224,9 +224,9 @@ static int free_dind_blocks(handle_t *handle,
struct buffer_head *bh;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
if (!bh)
return -EIO;
bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
......@@ -254,9 +254,9 @@ static int free_tind_blocks(handle_t *handle,
struct buffer_head *bh;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
if (!bh)
return -EIO;
bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
......@@ -382,9 +382,9 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
struct ext4_extent_header *eh;
block = ext4_idx_pblock(ix);
bh = sb_bread(inode->i_sb, block);
if (!bh)
return -EIO;
bh = ext4_sb_bread(inode->i_sb, block, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
eh = (struct ext4_extent_header *)bh->b_data;
if (eh->eh_depth != 0) {
......@@ -535,22 +535,22 @@ int ext4_ext_migrate(struct inode *inode)
if (i_data[EXT4_IND_BLOCK]) {
retval = update_ind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
if (retval)
goto err_out;
if (retval)
goto err_out;
} else
lb.curr_block += max_entries;
if (i_data[EXT4_DIND_BLOCK]) {
retval = update_dind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
if (retval)
goto err_out;
if (retval)
goto err_out;
} else
lb.curr_block += max_entries * max_entries;
if (i_data[EXT4_TIND_BLOCK]) {
retval = update_tind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
if (retval)
goto err_out;
if (retval)
goto err_out;
}
/*
* Build the last extent
......
......@@ -1571,7 +1571,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
dentry);
return ERR_PTR(-EFSCORRUPTED);
}
inode = ext4_iget_normal(dir->i_sb, ino);
inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
......@@ -1613,7 +1613,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
return ERR_PTR(-EFSCORRUPTED);
}
return d_obtain_alias(ext4_iget_normal(child->d_sb, ino));
return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL));
}
/*
......
......@@ -127,10 +127,12 @@ static int verify_group_input(struct super_block *sb,
else if (free_blocks_count < 0)
ext4_warning(sb, "Bad blocks count %u",
input->blocks_count);
else if (!(bh = sb_bread(sb, end - 1)))
else if (IS_ERR(bh = ext4_sb_bread(sb, end - 1, 0))) {
err = PTR_ERR(bh);
bh = NULL;
ext4_warning(sb, "Cannot read last block (%llu)",
end - 1);
else if (outside(input->block_bitmap, start, end))
} else if (outside(input->block_bitmap, start, end))
ext4_warning(sb, "Block bitmap not in group (block %llu)",
(unsigned long long)input->block_bitmap);
else if (outside(input->inode_bitmap, start, end))
......@@ -781,11 +783,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
struct buffer_head **o_group_desc, **n_group_desc;
struct buffer_head *dind;
struct buffer_head *gdb_bh;
struct buffer_head **o_group_desc, **n_group_desc = NULL;
struct buffer_head *dind = NULL;
struct buffer_head *gdb_bh = NULL;
int gdbackups;
struct ext4_iloc iloc;
struct ext4_iloc iloc = { .bh = NULL };
__le32 *data;
int err;
......@@ -794,21 +796,22 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
"EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
gdb_num);
gdb_bh = sb_bread(sb, gdblock);
if (!gdb_bh)
return -EIO;
gdb_bh = ext4_sb_bread(sb, gdblock, 0);
if (IS_ERR(gdb_bh))
return PTR_ERR(gdb_bh);
gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
if (gdbackups < 0) {
err = gdbackups;
goto exit_bh;
goto errout;
}
data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
dind = sb_bread(sb, le32_to_cpu(*data));
if (!dind) {
err = -EIO;
goto exit_bh;
dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0);
if (IS_ERR(dind)) {
err = PTR_ERR(dind);
dind = NULL;
goto errout;
}
data = (__le32 *)dind->b_data;
......@@ -816,18 +819,18 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
ext4_warning(sb, "new group %u GDT block %llu not reserved",
group, gdblock);
err = -EINVAL;
goto exit_dind;
goto errout;
}
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
if (unlikely(err))
goto exit_dind;
goto errout;
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
if (unlikely(err))
goto exit_dind;
goto errout;
BUFFER_TRACE(dind, "get_write_access");
err = ext4_journal_get_write_access(handle, dind);
......@@ -837,7 +840,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
/* ext4_reserve_inode_write() gets a reference on the iloc */
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (unlikely(err))
goto exit_dind;
goto errout;
n_group_desc = ext4_kvmalloc((gdb_num + 1) *
sizeof(struct buffer_head *),
......@@ -846,7 +849,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = -ENOMEM;
ext4_warning(sb, "not enough memory for %lu groups",
gdb_num + 1);
goto exit_inode;
goto errout;
}
/*
......@@ -862,7 +865,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = ext4_handle_dirty_metadata(handle, NULL, dind);
if (unlikely(err)) {
ext4_std_error(sb, err);
goto exit_inode;
goto errout;
}
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >>
(9 - EXT4_SB(sb)->s_cluster_bits);
......@@ -871,8 +874,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
if (unlikely(err)) {
ext4_std_error(sb, err);
iloc.bh = NULL;
goto exit_inode;
goto errout;
}
brelse(dind);
......@@ -888,15 +890,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = ext4_handle_dirty_super(handle, sb);
if (err)
ext4_std_error(sb, err);
return err;
exit_inode:
errout:
kvfree(n_group_desc);
brelse(iloc.bh);
exit_dind:
brelse(dind);
exit_bh:
brelse(gdb_bh);
ext4_debug("leaving with error %d\n", err);
......@@ -916,9 +914,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
gdblock = ext4_meta_bg_first_block_no(sb, group) +
ext4_bg_has_super(sb, group);
gdb_bh = sb_bread(sb, gdblock);
if (!gdb_bh)
return -EIO;
gdb_bh = ext4_sb_bread(sb, gdblock, 0);
if (IS_ERR(gdb_bh))
return PTR_ERR(gdb_bh);
n_group_desc = ext4_kvmalloc((gdb_num + 1) *
sizeof(struct buffer_head *),
GFP_NOFS);
......@@ -975,9 +973,10 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
return -ENOMEM;
data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
dind = sb_bread(sb, le32_to_cpu(*data));
if (!dind) {
err = -EIO;
dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0);
if (IS_ERR(dind)) {
err = PTR_ERR(dind);
dind = NULL;
goto exit_free;
}
......@@ -996,9 +995,10 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
err = -EINVAL;
goto exit_bh;
}
primary[res] = sb_bread(sb, blk);
if (!primary[res]) {
err = -EIO;
primary[res] = ext4_sb_bread(sb, blk, 0);
if (IS_ERR(primary[res])) {
err = PTR_ERR(primary[res]);
primary[res] = NULL;
goto exit_bh;
}
gdbackups = verify_reserved_gdb(sb, group, primary[res]);
......@@ -1631,13 +1631,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
}
if (reserved_gdb || gdb_off == 0) {
if (ext4_has_feature_resize_inode(sb) ||
if (!ext4_has_feature_resize_inode(sb) ||
!le16_to_cpu(es->s_reserved_gdt_blocks)) {
ext4_warning(sb,
"No reserved GDT blocks, can't resize");
return -EPERM;
}
inode = ext4_iget(sb, EXT4_RESIZE_INO);
inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(inode);
......@@ -1965,7 +1965,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
}
if (!resize_inode)
resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
resize_inode = ext4_iget(sb, EXT4_RESIZE_INO,
EXT4_IGET_SPECIAL);
if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
......
......@@ -140,6 +140,29 @@ MODULE_ALIAS_FS("ext3");
MODULE_ALIAS("ext3");
#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
/*
* This works like sb_bread() except it uses ERR_PTR for error
* returns. Currently with sb_bread it's impossible to distinguish
* between ENOMEM and EIO situations (since both result in a NULL
* return.
*/
struct buffer_head *
ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags)
{
struct buffer_head *bh = sb_getblk(sb, block);
if (bh == NULL)
return ERR_PTR(-ENOMEM);
if (buffer_uptodate(bh))
return bh;
ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
put_bh(bh);
return ERR_PTR(-EIO);
}
static int ext4_verify_csum_type(struct super_block *sb,
struct ext4_super_block *es)
{
......@@ -1000,14 +1023,13 @@ static void ext4_put_super(struct super_block *sb)
invalidate_bdev(sbi->journal_bdev);
ext4_blkdev_remove(sbi);
}
if (sbi->s_ea_inode_cache) {
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
sbi->s_ea_inode_cache = NULL;
}
if (sbi->s_ea_block_cache) {
ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
sbi->s_ea_block_cache = NULL;
}
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
sbi->s_ea_inode_cache = NULL;
ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
sbi->s_ea_block_cache = NULL;
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
brelse(sbi->s_sbh);
......@@ -1151,20 +1173,11 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
{
struct inode *inode;
if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
return ERR_PTR(-ESTALE);
if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
return ERR_PTR(-ESTALE);
/* iget isn't really right if the inode is currently unallocated!!
*
* ext4_read_inode will return a bad_inode if the inode had been
* deleted, so we should be safe.
*
/*
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
inode = ext4_iget_normal(sb, ino);
inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
......@@ -1189,6 +1202,16 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
ext4_nfs_get_inode);
}
static int ext4_nfs_commit_metadata(struct inode *inode)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL
};
trace_ext4_nfs_commit_metadata(inode);
return ext4_write_inode(inode, &wbc);
}
/*
* Try to release metadata pages (indirect blocks, directories) which are
* mapped via the block device. Since these pages could have journal heads
......@@ -1393,6 +1416,7 @@ static const struct export_operations ext4_export_ops = {
.fh_to_dentry = ext4_fh_to_dentry,
.fh_to_parent = ext4_fh_to_parent,
.get_parent = ext4_get_parent,
.commit_metadata = ext4_nfs_commit_metadata,
};
enum {
......@@ -1939,7 +1963,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
#ifdef CONFIG_FS_DAX
ext4_msg(sb, KERN_WARNING,
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
sbi->s_mount_opt |= m->mount_opt;
sbi->s_mount_opt |= m->mount_opt;
#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
return -1;
......@@ -3842,12 +3866,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
goto failed_mount;
}
if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
"DAX unsupported by block device.");
goto failed_mount;
}
}
......@@ -4328,7 +4352,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* so we can safely mount the rest of the filesystem now.
*/
root = ext4_iget(sb, EXT4_ROOT_INO);
root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(root)) {
ext4_msg(sb, KERN_ERR, "get root inode failed");
ret = PTR_ERR(root);
......@@ -4522,14 +4546,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
if (sbi->s_ea_inode_cache) {
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
sbi->s_ea_inode_cache = NULL;
}
if (sbi->s_ea_block_cache) {
ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
sbi->s_ea_block_cache = NULL;
}
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
sbi->s_ea_inode_cache = NULL;
ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
sbi->s_ea_block_cache = NULL;
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
......@@ -4598,7 +4620,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* happen if we iget() an unused inode, as the subsequent iput()
* will try to delete it.
*/
journal_inode = ext4_iget(sb, journal_inum);
journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
return NULL;
......@@ -5680,7 +5702,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
qf_inode = ext4_iget(sb, qf_inums[type]);
qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
return PTR_ERR(qf_inode);
......@@ -5690,9 +5712,9 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
qf_inode->i_flags |= S_NOQUOTA;
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
err = dquot_enable(qf_inode, type, format_id, flags);
iput(qf_inode);
if (err)
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
iput(qf_inode);
return err;
}
......
......@@ -384,7 +384,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
struct inode *inode;
int err;
inode = ext4_iget(parent->i_sb, ea_ino);
inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(parent->i_sb,
......@@ -522,14 +522,13 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
error = -ENODATA;
if (!EXT4_I(inode)->i_file_acl)
goto cleanup;
return -ENODATA;
ea_idebug(inode, "reading block %llu",
(unsigned long long)EXT4_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh)
goto cleanup;
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh))
return PTR_ERR(bh);
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
error = ext4_xattr_check_block(inode, bh);
......@@ -696,26 +695,23 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
error = 0;
if (!EXT4_I(inode)->i_file_acl)
goto cleanup;
return 0;
ea_idebug(inode, "reading block %llu",
(unsigned long long)EXT4_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
goto cleanup;
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh))
return PTR_ERR(bh);
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
error = ext4_xattr_check_block(inode, bh);
if (error)
goto cleanup;
ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer,
buffer_size);
cleanup:
brelse(bh);
return error;
}
......@@ -830,9 +826,9 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
}
if (EXT4_I(inode)->i_file_acl) {
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh) {
ret = -EIO;
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh)) {
ret = PTR_ERR(bh);
goto out;
}
......@@ -1486,7 +1482,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
}
while (ce) {
ea_inode = ext4_iget(inode->i_sb, ce->e_value);
ea_inode = ext4_iget(inode->i_sb, ce->e_value,
EXT4_IGET_NORMAL);
if (!IS_ERR(ea_inode) &&
!is_bad_inode(ea_inode) &&
(EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
......@@ -1821,16 +1818,15 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
if (EXT4_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */
bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
error = -EIO;
if (!bs->bh)
goto cleanup;
bs->bh = ext4_sb_bread(sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bs->bh))
return PTR_ERR(bs->bh);
ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
atomic_read(&(bs->bh->b_count)),
le32_to_cpu(BHDR(bs->bh)->h_refcount));
error = ext4_xattr_check_block(inode, bs->bh);
if (error)
goto cleanup;
return error;
/* Find the named attribute. */
bs->s.base = BHDR(bs->bh);
bs->s.first = BFIRST(bs->bh);
......@@ -1839,13 +1835,10 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
error = xattr_find_entry(inode, &bs->s.here, bs->s.end,
i->name_index, i->name, 1);
if (error && error != -ENODATA)
goto cleanup;
return error;
bs->s.not_found = error;
}
error = 0;
cleanup:
return error;
return 0;
}
static int
......@@ -2274,9 +2267,9 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
if (!EXT4_I(inode)->i_file_acl)
return NULL;
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh)
return ERR_PTR(-EIO);
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh))
return bh;
error = ext4_xattr_check_block(inode, bh);
if (error) {
brelse(bh);
......@@ -2729,7 +2722,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
base = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
min_offs = end - base;
total_ino = sizeof(struct ext4_xattr_ibody_header);
total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32);
error = xattr_check_inode(inode, header, end);
if (error)
......@@ -2746,10 +2739,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
if (EXT4_I(inode)->i_file_acl) {
struct buffer_head *bh;
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh)) {
error = PTR_ERR(bh);
goto cleanup;
}
error = ext4_xattr_check_block(inode, bh);
if (error) {
brelse(bh);
......@@ -2903,11 +2897,12 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
}
if (EXT4_I(inode)->i_file_acl) {
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %llu read error",
EXT4_I(inode)->i_file_acl);
error = -EIO;
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh)) {
error = PTR_ERR(bh);
if (error == -EIO)
EXT4_ERROR_INODE(inode, "block %llu read error",
EXT4_I(inode)->i_file_acl);
goto cleanup;
}
error = ext4_xattr_check_block(inode, bh);
......@@ -3060,8 +3055,10 @@ ext4_xattr_block_cache_find(struct inode *inode,
while (ce) {
struct buffer_head *bh;
bh = sb_bread(inode->i_sb, ce->e_value);
if (!bh) {
bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO);
if (IS_ERR(bh)) {
if (PTR_ERR(bh) == -ENOMEM)
return NULL;
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
......
......@@ -439,6 +439,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
finish_wait(&journal->j_wait_updates, &wait);
}
spin_unlock(&commit_transaction->t_handle_lock);
commit_transaction->t_state = T_SWITCH;
write_unlock(&journal->j_state_lock);
J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
journal->j_max_transaction_buffers);
......@@ -505,6 +507,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
atomic_sub(atomic_read(&journal->j_reserved_credits),
&commit_transaction->t_outstanding_credits);
write_lock(&journal->j_state_lock);
trace_jbd2_commit_flushing(journal, commit_transaction);
stats.run.rs_flushing = jiffies;
stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
......
......@@ -138,9 +138,9 @@ static inline void update_t_max_wait(transaction_t *transaction,
}
/*
* Wait until running transaction passes T_LOCKED state. Also starts the commit
* if needed. The function expects running transaction to exist and releases
* j_state_lock.
* Wait until running transaction passes to T_FLUSH state and new transaction
* can thus be started. Also starts the commit if needed. The function expects
* running transaction to exist and releases j_state_lock.
*/
static void wait_transaction_locked(journal_t *journal)
__releases(journal->j_state_lock)
......@@ -160,6 +160,32 @@ static void wait_transaction_locked(journal_t *journal)
finish_wait(&journal->j_wait_transaction_locked, &wait);
}
/*
* Wait until running transaction transitions from T_SWITCH to T_FLUSH
* state and new transaction can thus be started. The function releases
* j_state_lock.
*/
static void wait_transaction_switching(journal_t *journal)
__releases(journal->j_state_lock)
{
DEFINE_WAIT(wait);
if (WARN_ON(!journal->j_running_transaction ||
journal->j_running_transaction->t_state != T_SWITCH))
return;
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
read_unlock(&journal->j_state_lock);
/*
* We don't call jbd2_might_wait_for_commit() here as there's no
* waiting for outstanding handles happening anymore in T_SWITCH state
* and handling of reserved handles actually relies on that for
* correctness.
*/
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
}
static void sub_reserved_credits(journal_t *journal, int blocks)
{
atomic_sub(blocks, &journal->j_reserved_credits);
......@@ -183,7 +209,8 @@ static int add_transaction_credits(journal_t *journal, int blocks,
* If the current transaction is locked down for commit, wait
* for the lock to be released.
*/
if (t->t_state == T_LOCKED) {
if (t->t_state != T_RUNNING) {
WARN_ON_ONCE(t->t_state >= T_FLUSH);
wait_transaction_locked(journal);
return 1;
}
......@@ -360,8 +387,14 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
/*
* We have handle reserved so we are allowed to join T_LOCKED
* transaction and we don't have to check for transaction size
* and journal space.
* and journal space. But we still have to wait while running
* transaction is being switched to a committing one as it
* won't wait for any handles anymore.
*/
if (transaction->t_state == T_SWITCH) {
wait_transaction_switching(journal);
goto repeat;
}
sub_reserved_credits(journal, blocks);
handle->h_reserved = 0;
}
......@@ -910,7 +943,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
* this is the first time this transaction is touching this buffer,
* reset the modified flag
*/
jh->b_modified = 0;
jh->b_modified = 0;
/*
* If the buffer is not journaled right now, we need to make sure it
......
......@@ -575,6 +575,7 @@ struct transaction_s
enum {
T_RUNNING,
T_LOCKED,
T_SWITCH,
T_FLUSH,
T_COMMIT,
T_COMMIT_DFLUSH,
......@@ -662,13 +663,13 @@ struct transaction_s
/*
* Number of outstanding updates running on this transaction
* [t_handle_lock]
* [none]
*/
atomic_t t_updates;
/*
* Number of buffers reserved for use by all handles in this transaction
* handle but not yet modified. [t_handle_lock]
* handle but not yet modified. [none]
*/
atomic_t t_outstanding_credits;
......@@ -690,7 +691,7 @@ struct transaction_s
ktime_t t_start_time;
/*
* How many handles used this transaction? [t_handle_lock]
* How many handles used this transaction? [none]
*/
atomic_t t_handle_count;
......
......@@ -226,6 +226,26 @@ TRACE_EVENT(ext4_drop_inode,
(unsigned long) __entry->ino, __entry->drop)
);
TRACE_EVENT(ext4_nfs_commit_metadata,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
),
TP_printk("dev %d,%d ino %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino)
);
TRACE_EVENT(ext4_mark_inode_dirty,
TP_PROTO(struct inode *inode, unsigned long IP),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册