提交 0e01df10 编写于 作者: L Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Fix a number of bugs, most notably a potential stale data exposure
  after a crash and a potential BUG_ON crash if a file has the data
  journalling flag enabled while it has dirty delayed allocation blocks
  that haven't been written yet.  Also fix a potential crash in the new
  project quota code and a maliciously corrupted file system.

  In addition, fix some DAX-specific bugs, including when there is a
  transient ENOSPC situation and races between writes via direct I/O and
  an mmap'ed segment that could lead to lost I/O.

  Finally the usual set of miscellaneous cleanups"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (23 commits)
  ext4: pre-zero allocated blocks for DAX IO
  ext4: refactor direct IO code
  ext4: fix race in transient ENOSPC detection
  ext4: handle transient ENOSPC properly for DAX
  dax: call get_blocks() with create == 1 for write faults to unwritten extents
  ext4: remove unmeetable inconsisteny check from ext4_find_extent()
  jbd2: remove excess descriptions for handle_s
  ext4: remove unnecessary bio get/put
  ext4: silence UBSAN in ext4_mb_init()
  ext4: address UBSAN warning in mb_find_order_for_block()
  ext4: fix oops on corrupted filesystem
  ext4: fix check of dqget() return value in ext4_ioctl_setproject()
  ext4: clean up error handling when orphan list is corrupted
  ext4: fix hang when processing corrupted orphaned inode list
  ext4: remove trailing \n from ext4_warning/ext4_error calls
  ext4: fix races between changing inode journal mode and ext4_writepages
  ext4: handle unwritten or delalloc buffers before enabling data journaling
  ext4: fix jbd2 handle extension in ext4_ext_truncate_extend_restart()
  ext4: do not ask jbd2 to write data for delalloc buffers
  jbd2: add support for avoiding data writes during transaction commits
  ...
...@@ -936,6 +936,8 @@ static int compat_filldir(struct dir_context *ctx, const char *name, int namlen, ...@@ -936,6 +936,8 @@ static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
} }
dirent = buf->previous; dirent = buf->previous;
if (dirent) { if (dirent) {
if (signal_pending(current))
return -EINTR;
if (__put_user(offset, &dirent->d_off)) if (__put_user(offset, &dirent->d_off))
goto efault; goto efault;
} }
...@@ -1020,6 +1022,8 @@ static int compat_filldir64(struct dir_context *ctx, const char *name, ...@@ -1020,6 +1022,8 @@ static int compat_filldir64(struct dir_context *ctx, const char *name,
dirent = buf->previous; dirent = buf->previous;
if (dirent) { if (dirent) {
if (signal_pending(current))
return -EINTR;
if (__put_user_unaligned(offset, &dirent->d_off)) if (__put_user_unaligned(offset, &dirent->d_off))
goto efault; goto efault;
} }
......
...@@ -676,7 +676,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, ...@@ -676,7 +676,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (error) if (error)
goto unlock_page; goto unlock_page;
if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { if (!buffer_mapped(&bh) && !vmf->cow_page) {
if (vmf->flags & FAULT_FLAG_WRITE) { if (vmf->flags & FAULT_FLAG_WRITE) {
error = get_block(inode, block, &bh, 1); error = get_block(inode, block, &bh, 1);
count_vm_event(PGMAJFAULT); count_vm_event(PGMAJFAULT);
......
...@@ -610,7 +610,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) ...@@ -610,7 +610,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
return 1;
} }
/* /*
......
...@@ -150,6 +150,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) ...@@ -150,6 +150,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
while (ctx->pos < inode->i_size) { while (ctx->pos < inode->i_size) {
struct ext4_map_blocks map; struct ext4_map_blocks map;
if (fatal_signal_pending(current)) {
err = -ERESTARTSYS;
goto errout;
}
cond_resched();
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
map.m_len = 1; map.m_len = 1;
err = ext4_map_blocks(NULL, inode, &map, 0); err = ext4_map_blocks(NULL, inode, &map, 0);
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <crypto/hash.h> #include <crypto/hash.h>
#include <linux/falloc.h> #include <linux/falloc.h>
#include <linux/percpu-rwsem.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/compat.h> #include <linux/compat.h>
#endif #endif
...@@ -581,6 +582,9 @@ enum { ...@@ -581,6 +582,9 @@ enum {
#define EXT4_GET_BLOCKS_ZERO 0x0200 #define EXT4_GET_BLOCKS_ZERO 0x0200
#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\ #define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
EXT4_GET_BLOCKS_ZERO) EXT4_GET_BLOCKS_ZERO)
/* Caller will submit data before dropping transaction handle. This
* allows jbd2 to avoid submitting data before commit. */
#define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400
/* /*
* The bit position of these flags must not overlap with any of the * The bit position of these flags must not overlap with any of the
...@@ -1505,6 +1509,9 @@ struct ext4_sb_info { ...@@ -1505,6 +1509,9 @@ struct ext4_sb_info {
struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state;
/* Barrier between changing inodes' journal flags and writepages ops. */
struct percpu_rw_semaphore s_journal_flag_rwsem;
}; };
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
...@@ -1549,7 +1556,6 @@ enum { ...@@ -1549,7 +1556,6 @@ enum {
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
nolocking */ nolocking */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_ORDERED_MODE, /* data=ordered mode */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
}; };
...@@ -2521,8 +2527,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); ...@@ -2521,8 +2527,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, int ext4_dax_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock, int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_dio_get_block(struct inode *inode, sector_t iblock, int ext4_dio_get_block(struct inode *inode, sector_t iblock,
...@@ -2581,7 +2587,6 @@ extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -2581,7 +2587,6 @@ extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
/* indirect.c */ /* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags); struct ext4_map_blocks *map, int flags);
extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
extern void ext4_ind_truncate(handle_t *, struct inode *inode); extern void ext4_ind_truncate(handle_t *, struct inode *inode);
...@@ -3329,6 +3334,13 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) ...@@ -3329,6 +3334,13 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
} }
} }
static inline bool ext4_aligned_io(struct inode *inode, loff_t off, loff_t len)
{
int blksize = 1 << inode->i_blkbits;
return IS_ALIGNED(off, blksize) && IS_ALIGNED(len, blksize);
}
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSBADCRC EBADMSG /* Bad CRC detected */
......
...@@ -359,10 +359,21 @@ static inline int ext4_journal_force_commit(journal_t *journal) ...@@ -359,10 +359,21 @@ static inline int ext4_journal_force_commit(journal_t *journal)
return 0; return 0;
} }
static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) static inline int ext4_jbd2_inode_add_write(handle_t *handle,
struct inode *inode)
{ {
if (ext4_handle_valid(handle)) if (ext4_handle_valid(handle))
return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode); return jbd2_journal_inode_add_write(handle,
EXT4_I(inode)->jinode);
return 0;
}
static inline int ext4_jbd2_inode_add_wait(handle_t *handle,
struct inode *inode)
{
if (ext4_handle_valid(handle))
return jbd2_journal_inode_add_wait(handle,
EXT4_I(inode)->jinode);
return 0; return 0;
} }
......
...@@ -120,9 +120,14 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, ...@@ -120,9 +120,14 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle,
if (!ext4_handle_valid(handle)) if (!ext4_handle_valid(handle))
return 0; return 0;
if (handle->h_buffer_credits > needed) if (handle->h_buffer_credits >= needed)
return 0; return 0;
err = ext4_journal_extend(handle, needed); /*
* If we need to extend the journal get a few extra blocks
* while we're at it for efficiency's sake.
*/
needed += 3;
err = ext4_journal_extend(handle, needed - handle->h_buffer_credits);
if (err <= 0) if (err <= 0)
return err; return err;
err = ext4_truncate_restart_trans(handle, inode, needed); err = ext4_truncate_restart_trans(handle, inode, needed);
...@@ -907,13 +912,6 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, ...@@ -907,13 +912,6 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
eh = ext_block_hdr(bh); eh = ext_block_hdr(bh);
ppos++; ppos++;
if (unlikely(ppos > depth)) {
put_bh(bh);
EXT4_ERROR_INODE(inode,
"ppos %d > depth %d", ppos, depth);
ret = -EFSCORRUPTED;
goto err;
}
path[ppos].p_bh = bh; path[ppos].p_bh = bh;
path[ppos].p_hdr = eh; path[ppos].p_hdr = eh;
} }
...@@ -2583,7 +2581,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, ...@@ -2583,7 +2581,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
} }
} else } else
ext4_error(sbi->s_sb, "strange request: removal(2) " ext4_error(sbi->s_sb, "strange request: removal(2) "
"%u-%u from %u:%u\n", "%u-%u from %u:%u",
from, to, le32_to_cpu(ex->ee_block), ee_len); from, to, le32_to_cpu(ex->ee_block), ee_len);
return 0; return 0;
} }
...@@ -3738,7 +3736,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, ...@@ -3738,7 +3736,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
if (ee_block != map->m_lblk || ee_len > map->m_len) { if (ee_block != map->m_lblk || ee_len > map->m_len) {
#ifdef EXT4_DEBUG #ifdef EXT4_DEBUG
ext4_warning("Inode (%ld) finished: extent logical block %llu," ext4_warning("Inode (%ld) finished: extent logical block %llu,"
" len %u; IO logical block %llu, len %u\n", " len %u; IO logical block %llu, len %u",
inode->i_ino, (unsigned long long)ee_block, ee_len, inode->i_ino, (unsigned long long)ee_block, ee_len,
(unsigned long long)map->m_lblk, map->m_len); (unsigned long long)map->m_lblk, map->m_len);
#endif #endif
......
...@@ -707,7 +707,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -707,7 +707,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
(status & EXTENT_STATUS_WRITTEN)) { (status & EXTENT_STATUS_WRITTEN)) {
ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as " ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
" delayed and written which can potentially " " delayed and written which can potentially "
" cause data loss.\n", lblk, len); " cause data loss.", lblk, len);
WARN_ON(1); WARN_ON(1);
} }
......
...@@ -202,7 +202,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -202,7 +202,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (IS_ERR(handle)) if (IS_ERR(handle))
result = VM_FAULT_SIGBUS; result = VM_FAULT_SIGBUS;
else else
result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL); result = __dax_fault(vma, vmf, ext4_dax_get_block, NULL);
if (write) { if (write) {
if (!IS_ERR(handle)) if (!IS_ERR(handle))
...@@ -238,7 +238,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, ...@@ -238,7 +238,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
result = VM_FAULT_SIGBUS; result = VM_FAULT_SIGBUS;
else else
result = __dax_pmd_fault(vma, addr, pmd, flags, result = __dax_pmd_fault(vma, addr, pmd, flags,
ext4_dax_mmap_get_block, NULL); ext4_dax_get_block, NULL);
if (write) { if (write) {
if (!IS_ERR(handle)) if (!IS_ERR(handle))
...@@ -373,7 +373,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) ...@@ -373,7 +373,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ext4_encrypted_inode(d_inode(dir)) && if (ext4_encrypted_inode(d_inode(dir)) &&
!ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) { !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) {
ext4_warning(inode->i_sb, ext4_warning(inode->i_sb,
"Inconsistent encryption contexts: %lu/%lu\n", "Inconsistent encryption contexts: %lu/%lu",
(unsigned long) d_inode(dir)->i_ino, (unsigned long) d_inode(dir)->i_ino,
(unsigned long) inode->i_ino); (unsigned long) inode->i_ino);
dput(dir); dput(dir);
......
...@@ -1150,25 +1150,20 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) ...@@ -1150,25 +1150,20 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
ext4_group_t block_group; ext4_group_t block_group;
int bit; int bit;
struct buffer_head *bitmap_bh; struct buffer_head *bitmap_bh = NULL;
struct inode *inode = NULL; struct inode *inode = NULL;
long err = -EIO; int err = -EFSCORRUPTED;
/* Error cases - e2fsck has already cleaned up for us */ if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
if (ino > max_ino) { goto bad_orphan;
ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
err = -EFSCORRUPTED;
goto error;
}
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = ext4_read_inode_bitmap(sb, block_group); bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (IS_ERR(bitmap_bh)) { if (IS_ERR(bitmap_bh)) {
err = PTR_ERR(bitmap_bh); ext4_error(sb, "inode bitmap error %ld for orphan %lu",
ext4_warning(sb, "inode bitmap error %ld for orphan %lu", ino, PTR_ERR(bitmap_bh));
ino, err); return (struct inode *) bitmap_bh;
goto error;
} }
/* Having the inode bit set should be a 100% indicator that this /* Having the inode bit set should be a 100% indicator that this
...@@ -1179,15 +1174,21 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) ...@@ -1179,15 +1174,21 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
goto bad_orphan; goto bad_orphan;
inode = ext4_iget(sb, ino); inode = ext4_iget(sb, ino);
if (IS_ERR(inode)) if (IS_ERR(inode)) {
goto iget_failed; err = PTR_ERR(inode);
ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
ino, err);
return inode;
}
/* /*
* If the orphans has i_nlinks > 0 then it should be able to be * If the orphans has i_nlinks > 0 then it should be able to
* truncated, otherwise it won't be removed from the orphan list * be truncated, otherwise it won't be removed from the orphan
* during processing and an infinite loop will result. * list during processing and an infinite loop will result.
* Similarly, it must not be a bad inode.
*/ */
if (inode->i_nlink && !ext4_can_truncate(inode)) if ((inode->i_nlink && !ext4_can_truncate(inode)) ||
is_bad_inode(inode))
goto bad_orphan; goto bad_orphan;
if (NEXT_ORPHAN(inode) > max_ino) if (NEXT_ORPHAN(inode) > max_ino)
...@@ -1195,29 +1196,25 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) ...@@ -1195,29 +1196,25 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
brelse(bitmap_bh); brelse(bitmap_bh);
return inode; return inode;
iget_failed:
err = PTR_ERR(inode);
inode = NULL;
bad_orphan: bad_orphan:
ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); ext4_error(sb, "bad orphan inode %lu", ino);
printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n", if (bitmap_bh)
bit, (unsigned long long)bitmap_bh->b_blocknr, printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n",
ext4_test_bit(bit, bitmap_bh->b_data)); bit, (unsigned long long)bitmap_bh->b_blocknr,
printk(KERN_WARNING "inode=%p\n", inode); ext4_test_bit(bit, bitmap_bh->b_data));
if (inode) { if (inode) {
printk(KERN_WARNING "is_bad_inode(inode)=%d\n", printk(KERN_ERR "is_bad_inode(inode)=%d\n",
is_bad_inode(inode)); is_bad_inode(inode));
printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n", printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode)); NEXT_ORPHAN(inode));
printk(KERN_WARNING "max_ino=%lu\n", max_ino); printk(KERN_ERR "max_ino=%lu\n", max_ino);
printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink); printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */ /* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0) if (inode->i_nlink == 0)
inode->i_blocks = 0; inode->i_blocks = 0;
iput(inode); iput(inode);
} }
brelse(bitmap_bh); brelse(bitmap_bh);
error:
return ERR_PTR(err); return ERR_PTR(err);
} }
......
...@@ -648,133 +648,6 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, ...@@ -648,133 +648,6 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
return err; return err;
} }
/*
* O_DIRECT for ext3 (or indirect map) based files
*
* If the O_DIRECT write will extend the file then add this inode to the
* orphan list. So recovery will truncate it back to the original size
* if the machine crashes during the write.
*
* If the O_DIRECT write is intantiating holes inside i_size and the machine
* crashes then stale disk data _may_ be exposed inside the file. But current
* VFS code falls back into buffered path in that case so we are safe.
*/
ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
loff_t offset = iocb->ki_pos;
handle_t *handle;
ssize_t ret;
int orphan = 0;
size_t count = iov_iter_count(iter);
int retries = 0;
if (iov_iter_rw(iter) == WRITE) {
loff_t final_size = offset + count;
if (final_size > inode->i_size) {
/* Credits for sb + inode write */
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
ret = ext4_orphan_add(handle, inode);
if (ret) {
ext4_journal_stop(handle);
goto out;
}
orphan = 1;
ei->i_disksize = inode->i_size;
ext4_journal_stop(handle);
}
}
retry:
if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) {
/*
* Nolock dioread optimization may be dynamically disabled
* via ext4_inode_block_unlocked_dio(). Check inode's state
* while holding extra i_dio_count ref.
*/
inode_dio_begin(inode);
smp_mb();
if (unlikely(ext4_test_inode_state(inode,
EXT4_STATE_DIOREAD_LOCK))) {
inode_dio_end(inode);
goto locked;
}
if (IS_DAX(inode))
ret = dax_do_io(iocb, inode, iter,
ext4_dio_get_block, NULL, 0);
else
ret = __blockdev_direct_IO(iocb, inode,
inode->i_sb->s_bdev, iter,
ext4_dio_get_block,
NULL, NULL, 0);
inode_dio_end(inode);
} else {
locked:
if (IS_DAX(inode))
ret = dax_do_io(iocb, inode, iter,
ext4_dio_get_block, NULL, DIO_LOCKING);
else
ret = blockdev_direct_IO(iocb, inode, iter,
ext4_dio_get_block);
if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
loff_t end = offset + count;
if (end > isize)
ext4_truncate_failed_write(inode);
}
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
if (orphan) {
int err;
/* Credits for sb + inode write */
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
/* This is really bad luck. We've written the data
* but cannot extend i_size. Bail out and pretend
* the write failed... */
ret = PTR_ERR(handle);
if (inode->i_nlink)
ext4_orphan_del(NULL, inode);
goto out;
}
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
if (ret > 0) {
loff_t end = offset + ret;
if (end > inode->i_size) {
ei->i_disksize = end;
i_size_write(inode, end);
/*
* We're going to return a positive `ret'
* here due to non-zero-length I/O, so there's
* no way of reporting error returns from
* ext4_mark_inode_dirty() to userspace. So
* ignore it.
*/
ext4_mark_inode_dirty(handle, inode);
}
}
err = ext4_journal_stop(handle);
if (ret == 0)
ret = err;
}
out:
return ret;
}
/* /*
* Calculate the number of metadata blocks need to reserve * Calculate the number of metadata blocks need to reserve
* to allocate a new block at @lblocks for non extent file based file * to allocate a new block at @lblocks for non extent file based file
......
...@@ -1780,7 +1780,7 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data) ...@@ -1780,7 +1780,7 @@ int empty_inline_dir(struct inode *dir, int *has_inline_data)
ext4_warning(dir->i_sb, ext4_warning(dir->i_sb,
"bad inline directory (dir #%lu) - " "bad inline directory (dir #%lu) - "
"inode %u, rec_len %u, name_len %d" "inode %u, rec_len %u, name_len %d"
"inline size %d\n", "inline size %d",
dir->i_ino, le32_to_cpu(de->inode), dir->i_ino, le32_to_cpu(de->inode),
le16_to_cpu(de->rec_len), de->name_len, le16_to_cpu(de->rec_len), de->name_len,
inline_size); inline_size);
......
...@@ -684,6 +684,24 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -684,6 +684,24 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
ret = check_block_validity(inode, map); ret = check_block_validity(inode, map);
if (ret != 0) if (ret != 0)
return ret; return ret;
/*
* Inodes with freshly allocated blocks where contents will be
* visible after transaction commit must be on transaction's
* ordered data list.
*/
if (map->m_flags & EXT4_MAP_NEW &&
!(map->m_flags & EXT4_MAP_UNWRITTEN) &&
!(flags & EXT4_GET_BLOCKS_ZERO) &&
!IS_NOQUOTA(inode) &&
ext4_should_order_data(inode)) {
if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
ret = ext4_jbd2_inode_add_wait(handle, inode);
else
ret = ext4_jbd2_inode_add_write(handle, inode);
if (ret)
return ret;
}
} }
return retval; return retval;
} }
...@@ -1289,15 +1307,6 @@ static int ext4_write_end(struct file *file, ...@@ -1289,15 +1307,6 @@ static int ext4_write_end(struct file *file,
int i_size_changed = 0; int i_size_changed = 0;
trace_ext4_write_end(inode, pos, len, copied); trace_ext4_write_end(inode, pos, len, copied);
if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) {
ret = ext4_jbd2_file_inode(handle, inode);
if (ret) {
unlock_page(page);
put_page(page);
goto errout;
}
}
if (ext4_has_inline_data(inode)) { if (ext4_has_inline_data(inode)) {
ret = ext4_write_inline_data_end(inode, pos, len, ret = ext4_write_inline_data_end(inode, pos, len,
copied, page); copied, page);
...@@ -2313,7 +2322,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) ...@@ -2313,7 +2322,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
* the data was copied into the page cache. * the data was copied into the page cache.
*/ */
get_blocks_flags = EXT4_GET_BLOCKS_CREATE | get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL; EXT4_GET_BLOCKS_METADATA_NOFAIL |
EXT4_GET_BLOCKS_IO_SUBMIT;
dioread_nolock = ext4_should_dioread_nolock(inode); dioread_nolock = ext4_should_dioread_nolock(inode);
if (dioread_nolock) if (dioread_nolock)
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
...@@ -2602,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping, ...@@ -2602,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping,
struct blk_plug plug; struct blk_plug plug;
bool give_up_on_write = false; bool give_up_on_write = false;
percpu_down_read(&sbi->s_journal_flag_rwsem);
trace_ext4_writepages(inode, wbc); trace_ext4_writepages(inode, wbc);
if (dax_mapping(mapping)) if (dax_mapping(mapping)) {
return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
wbc); wbc);
goto out_writepages;
}
/* /*
* No pages to write? This is mainly a kludge to avoid starting * No pages to write? This is mainly a kludge to avoid starting
...@@ -2776,6 +2789,7 @@ static int ext4_writepages(struct address_space *mapping, ...@@ -2776,6 +2789,7 @@ static int ext4_writepages(struct address_space *mapping,
out_writepages: out_writepages:
trace_ext4_writepages_result(inode, wbc, ret, trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write); nr_to_write - wbc->nr_to_write);
percpu_up_read(&sbi->s_journal_flag_rwsem);
return ret; return ret;
} }
...@@ -3215,75 +3229,52 @@ static int ext4_releasepage(struct page *page, gfp_t wait) ...@@ -3215,75 +3229,52 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
} }
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, /*
struct buffer_head *bh_result, int create) * Get block function for DAX IO and mmap faults. It takes care of converting
* unwritten extents to written ones and initializes new / converted blocks
* to zeros.
*/
int ext4_dax_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{ {
int ret, err; int ret;
int credits;
struct ext4_map_blocks map;
handle_t *handle = NULL;
int flags = 0;
ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
inode->i_ino, create);
map.m_lblk = iblock;
map.m_len = bh_result->b_size >> inode->i_blkbits;
credits = ext4_chunk_trans_blocks(inode, map.m_len);
if (create) {
flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
return ret;
}
}
ret = ext4_map_blocks(handle, inode, &map, flags); ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
if (create) { if (!create)
err = ext4_journal_stop(handle); return _ext4_get_block(inode, iblock, bh_result, 0);
if (ret >= 0 && err < 0)
ret = err;
}
if (ret <= 0)
goto out;
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
int err2;
/* ret = ext4_get_block_trans(inode, iblock, bh_result,
* We are protected by i_mmap_sem so we know block cannot go EXT4_GET_BLOCKS_PRE_IO |
* away from under us even though we dropped i_data_sem. EXT4_GET_BLOCKS_CREATE_ZERO);
* Convert extent to written and write zeros there. if (ret < 0)
* return ret;
* Note: We may get here even when create == 0.
*/
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
err = ext4_map_blocks(handle, inode, &map, if (buffer_unwritten(bh_result)) {
EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
if (err < 0)
ret = err;
err2 = ext4_journal_stop(handle);
if (err2 < 0 && ret > 0)
ret = err2;
}
out:
WARN_ON_ONCE(ret == 0 && create);
if (ret > 0) {
map_bh(bh_result, inode->i_sb, map.m_pblk);
/* /*
* At least for now we have to clear BH_New so that DAX code * We are protected by i_mmap_sem or i_mutex so we know block
* doesn't attempt to zero blocks again in a racy way. * cannot go away from under us even though we dropped
* i_data_sem. Convert extent to written and write zeros there.
*/ */
map.m_flags &= ~EXT4_MAP_NEW; ret = ext4_get_block_trans(inode, iblock, bh_result,
ext4_update_bh_state(bh_result, map.m_flags); EXT4_GET_BLOCKS_CONVERT |
bh_result->b_size = map.m_len << inode->i_blkbits; EXT4_GET_BLOCKS_CREATE_ZERO);
ret = 0; if (ret < 0)
return ret;
} }
return ret; /*
* At least for now we have to clear BH_New so that DAX code
* doesn't attempt to zero blocks again in a racy way.
*/
clear_buffer_new(bh_result);
return 0;
}
#else
/* Just define empty function, it will never get called. */
int ext4_dax_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
BUG();
return 0;
} }
#endif #endif
...@@ -3316,7 +3307,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ...@@ -3316,7 +3307,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
} }
/* /*
* For ext4 extent files, ext4 will do direct-io write to holes, * Handling of direct IO writes.
*
* For ext4 extent files, ext4 will do direct-io write even to holes,
* preallocated extents, and those write extend the file, no need to * preallocated extents, and those write extend the file, no need to
* fall back to buffered IO. * fall back to buffered IO.
* *
...@@ -3334,10 +3327,11 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ...@@ -3334,10 +3327,11 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
* if the machine crashes during the write. * if the machine crashes during the write.
* *
*/ */
static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
ssize_t ret; ssize_t ret;
loff_t offset = iocb->ki_pos; loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(iter); size_t count = iov_iter_count(iter);
...@@ -3345,10 +3339,25 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3345,10 +3339,25 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
get_block_t *get_block_func = NULL; get_block_t *get_block_func = NULL;
int dio_flags = 0; int dio_flags = 0;
loff_t final_size = offset + count; loff_t final_size = offset + count;
int orphan = 0;
handle_t *handle;
/* Use the old path for reads and writes beyond i_size. */ if (final_size > inode->i_size) {
if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) /* Credits for sb + inode write */
return ext4_ind_direct_IO(iocb, iter); handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
ret = ext4_orphan_add(handle, inode);
if (ret) {
ext4_journal_stop(handle);
goto out;
}
orphan = 1;
ei->i_disksize = inode->i_size;
ext4_journal_stop(handle);
}
BUG_ON(iocb->private == NULL); BUG_ON(iocb->private == NULL);
...@@ -3357,8 +3366,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3357,8 +3366,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
* conversion. This also disallows race between truncate() and * conversion. This also disallows race between truncate() and
* overwrite DIO as i_dio_count needs to be incremented under i_mutex. * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
*/ */
if (iov_iter_rw(iter) == WRITE) inode_dio_begin(inode);
inode_dio_begin(inode);
/* If we do a overwrite dio, i_mutex locking can be released */ /* If we do a overwrite dio, i_mutex locking can be released */
overwrite = *((int *)iocb->private); overwrite = *((int *)iocb->private);
...@@ -3367,7 +3375,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3367,7 +3375,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
inode_unlock(inode); inode_unlock(inode);
/* /*
* We could direct write to holes and fallocate. * For extent mapped files we could direct write to holes and fallocate.
* *
* Allocated blocks to fill the hole are marked as unwritten to prevent * Allocated blocks to fill the hole are marked as unwritten to prevent
* parallel buffered read to expose the stale data before DIO complete * parallel buffered read to expose the stale data before DIO complete
...@@ -3389,7 +3397,23 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3389,7 +3397,23 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
iocb->private = NULL; iocb->private = NULL;
if (overwrite) if (overwrite)
get_block_func = ext4_dio_get_block_overwrite; get_block_func = ext4_dio_get_block_overwrite;
else if (is_sync_kiocb(iocb)) { else if (IS_DAX(inode)) {
/*
* We can avoid zeroing for aligned DAX writes beyond EOF. Other
* writes need zeroing either because they can race with page
* faults or because they use partial blocks.
*/
if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
ext4_aligned_io(inode, offset, count))
get_block_func = ext4_dio_get_block;
else
get_block_func = ext4_dax_get_block;
dio_flags = DIO_LOCKING;
} else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
get_block_func = ext4_dio_get_block;
dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
} else if (is_sync_kiocb(iocb)) {
get_block_func = ext4_dio_get_block_unwritten_sync; get_block_func = ext4_dio_get_block_unwritten_sync;
dio_flags = DIO_LOCKING; dio_flags = DIO_LOCKING;
} else { } else {
...@@ -3399,10 +3423,10 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3399,10 +3423,10 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
#ifdef CONFIG_EXT4_FS_ENCRYPTION #ifdef CONFIG_EXT4_FS_ENCRYPTION
BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif #endif
if (IS_DAX(inode)) if (IS_DAX(inode)) {
ret = dax_do_io(iocb, inode, iter, get_block_func, ret = dax_do_io(iocb, inode, iter, get_block_func,
ext4_end_io_dio, dio_flags); ext4_end_io_dio, dio_flags);
else } else
ret = __blockdev_direct_IO(iocb, inode, ret = __blockdev_direct_IO(iocb, inode,
inode->i_sb->s_bdev, iter, inode->i_sb->s_bdev, iter,
get_block_func, get_block_func,
...@@ -3422,12 +3446,86 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3422,12 +3446,86 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
} }
if (iov_iter_rw(iter) == WRITE) inode_dio_end(inode);
inode_dio_end(inode);
/* take i_mutex locking again if we do a ovewrite dio */ /* take i_mutex locking again if we do a ovewrite dio */
if (overwrite) if (overwrite)
inode_lock(inode); inode_lock(inode);
if (ret < 0 && final_size > inode->i_size)
ext4_truncate_failed_write(inode);
/* Handle extending of i_size after direct IO write */
if (orphan) {
int err;
/* Credits for sb + inode write */
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
/* This is really bad luck. We've written the data
* but cannot extend i_size. Bail out and pretend
* the write failed... */
ret = PTR_ERR(handle);
if (inode->i_nlink)
ext4_orphan_del(NULL, inode);
goto out;
}
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
if (ret > 0) {
loff_t end = offset + ret;
if (end > inode->i_size) {
ei->i_disksize = end;
i_size_write(inode, end);
/*
* We're going to return a positive `ret'
* here due to non-zero-length I/O, so there's
* no way of reporting error returns from
* ext4_mark_inode_dirty() to userspace. So
* ignore it.
*/
ext4_mark_inode_dirty(handle, inode);
}
}
err = ext4_journal_stop(handle);
if (ret == 0)
ret = err;
}
out:
return ret;
}
static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{
int unlocked = 0;
struct inode *inode = iocb->ki_filp->f_mapping->host;
ssize_t ret;
if (ext4_should_dioread_nolock(inode)) {
/*
* Nolock dioread optimization may be dynamically disabled
* via ext4_inode_block_unlocked_dio(). Check inode's state
* while holding extra i_dio_count ref.
*/
inode_dio_begin(inode);
smp_mb();
if (unlikely(ext4_test_inode_state(inode,
EXT4_STATE_DIOREAD_LOCK)))
inode_dio_end(inode);
else
unlocked = 1;
}
if (IS_DAX(inode)) {
ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block,
NULL, unlocked ? 0 : DIO_LOCKING);
} else {
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
iter, ext4_dio_get_block,
NULL, NULL,
unlocked ? 0 : DIO_LOCKING);
}
if (unlocked)
inode_dio_end(inode);
return ret; return ret;
} }
...@@ -3455,10 +3553,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3455,10 +3553,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return 0; return 0;
trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) if (iov_iter_rw(iter) == READ)
ret = ext4_ext_direct_IO(iocb, iter); ret = ext4_direct_IO_read(iocb, iter);
else else
ret = ext4_ind_direct_IO(iocb, iter); ret = ext4_direct_IO_write(iocb, iter);
trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
return ret; return ret;
} }
...@@ -3534,10 +3632,7 @@ void ext4_set_aops(struct inode *inode) ...@@ -3534,10 +3632,7 @@ void ext4_set_aops(struct inode *inode)
{ {
switch (ext4_inode_journal_mode(inode)) { switch (ext4_inode_journal_mode(inode)) {
case EXT4_INODE_ORDERED_DATA_MODE: case EXT4_INODE_ORDERED_DATA_MODE:
ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE);
break;
case EXT4_INODE_WRITEBACK_DATA_MODE: case EXT4_INODE_WRITEBACK_DATA_MODE:
ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE);
break; break;
case EXT4_INODE_JOURNAL_DATA_MODE: case EXT4_INODE_JOURNAL_DATA_MODE:
inode->i_mapping->a_ops = &ext4_journalled_aops; inode->i_mapping->a_ops = &ext4_journalled_aops;
...@@ -3630,8 +3725,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, ...@@ -3630,8 +3725,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
} else { } else {
err = 0; err = 0;
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) if (ext4_should_order_data(inode))
err = ext4_jbd2_file_inode(handle, inode); err = ext4_jbd2_inode_add_write(handle, inode);
} }
unlock: unlock:
...@@ -5429,6 +5524,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5429,6 +5524,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
journal_t *journal; journal_t *journal;
handle_t *handle; handle_t *handle;
int err; int err;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
/* /*
* We have to be very careful here: changing a data block's * We have to be very careful here: changing a data block's
...@@ -5445,22 +5541,30 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5445,22 +5541,30 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return 0; return 0;
if (is_journal_aborted(journal)) if (is_journal_aborted(journal))
return -EROFS; return -EROFS;
/* We have to allocate physical blocks for delalloc blocks
* before flushing journal. otherwise delalloc blocks can not
* be allocated any more. even more truncate on delalloc blocks
* could trigger BUG by flushing delalloc blocks in journal.
* There is no delalloc block in non-journal data mode.
*/
if (val && test_opt(inode->i_sb, DELALLOC)) {
err = ext4_alloc_da_blocks(inode);
if (err < 0)
return err;
}
/* Wait for all existing dio workers */ /* Wait for all existing dio workers */
ext4_inode_block_unlocked_dio(inode); ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode); inode_dio_wait(inode);
/*
* Before flushing the journal and switching inode's aops, we have
* to flush all dirty data the inode has. There can be outstanding
* delayed allocations, there can be unwritten extents created by
* fallocate or buffered writes in dioread_nolock mode covered by
* dirty data which can be converted only after flushing the dirty
* data (and journalled aops don't know how to handle these cases).
*/
if (val) {
down_write(&EXT4_I(inode)->i_mmap_sem);
err = filemap_write_and_wait(inode->i_mapping);
if (err < 0) {
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
return err;
}
}
percpu_down_write(&sbi->s_journal_flag_rwsem);
jbd2_journal_lock_updates(journal); jbd2_journal_lock_updates(journal);
/* /*
...@@ -5477,6 +5581,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5477,6 +5581,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
err = jbd2_journal_flush(journal); err = jbd2_journal_flush(journal);
if (err < 0) { if (err < 0) {
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode);
return err; return err;
} }
...@@ -5485,6 +5590,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5485,6 +5590,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_set_aops(inode); ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
if (val)
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode);
/* Finally we can mark the inode as dirty. */ /* Finally we can mark the inode as dirty. */
......
...@@ -365,7 +365,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) ...@@ -365,7 +365,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
struct dquot *transfer_to[MAXQUOTAS] = { }; struct dquot *transfer_to[MAXQUOTAS] = { };
transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
if (transfer_to[PRJQUOTA]) { if (!IS_ERR(transfer_to[PRJQUOTA])) {
err = __dquot_transfer(inode, transfer_to); err = __dquot_transfer(inode, transfer_to);
dqput(transfer_to[PRJQUOTA]); dqput(transfer_to[PRJQUOTA]);
if (err) if (err)
......
...@@ -1266,6 +1266,7 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) ...@@ -1266,6 +1266,7 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
{ {
int order = 1; int order = 1;
int bb_incr = 1 << (e4b->bd_blkbits - 1);
void *bb; void *bb;
BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
...@@ -1278,7 +1279,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) ...@@ -1278,7 +1279,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
/* this block is part of buddy of order 'order' */ /* this block is part of buddy of order 'order' */
return order; return order;
} }
bb += 1 << (e4b->bd_blkbits - order); bb += bb_incr;
bb_incr >>= 1;
order++; order++;
} }
return 0; return 0;
...@@ -2583,7 +2585,7 @@ int ext4_mb_init(struct super_block *sb) ...@@ -2583,7 +2585,7 @@ int ext4_mb_init(struct super_block *sb)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned i, j; unsigned i, j;
unsigned offset; unsigned offset, offset_incr;
unsigned max; unsigned max;
int ret; int ret;
...@@ -2612,11 +2614,13 @@ int ext4_mb_init(struct super_block *sb) ...@@ -2612,11 +2614,13 @@ int ext4_mb_init(struct super_block *sb)
i = 1; i = 1;
offset = 0; offset = 0;
offset_incr = 1 << (sb->s_blocksize_bits - 1);
max = sb->s_blocksize << 2; max = sb->s_blocksize << 2;
do { do {
sbi->s_mb_offsets[i] = offset; sbi->s_mb_offsets[i] = offset;
sbi->s_mb_maxs[i] = max; sbi->s_mb_maxs[i] = max;
offset += 1 << (sb->s_blocksize_bits - i); offset += offset_incr;
offset_incr = offset_incr >> 1;
max = max >> 1; max = max >> 1;
i++; i++;
} while (i <= sb->s_blocksize_bits + 1); } while (i <= sb->s_blocksize_bits + 1);
...@@ -4935,7 +4939,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ...@@ -4935,7 +4939,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* boundary. * boundary.
*/ */
if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
ext4_warning(sb, "too much blocks added to group %u\n", ext4_warning(sb, "too much blocks added to group %u",
block_group); block_group);
err = -EINVAL; err = -EINVAL;
goto error_return; goto error_return;
......
...@@ -121,7 +121,7 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, ...@@ -121,7 +121,7 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
__ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, "%s", msg);
__ext4_warning(sb, function, line, __ext4_warning(sb, function, line,
"MMP failure info: last update time: %llu, last update " "MMP failure info: last update time: %llu, last update "
"node: %s, last update device: %s\n", "node: %s, last update device: %s",
(long long unsigned int) le64_to_cpu(mmp->mmp_time), (long long unsigned int) le64_to_cpu(mmp->mmp_time),
mmp->mmp_nodename, mmp->mmp_bdevname); mmp->mmp_nodename, mmp->mmp_bdevname);
} }
...@@ -353,7 +353,7 @@ int ext4_multi_mount_protect(struct super_block *sb, ...@@ -353,7 +353,7 @@ int ext4_multi_mount_protect(struct super_block *sb,
* wait for MMP interval and check mmp_seq. * wait for MMP interval and check mmp_seq.
*/ */
if (schedule_timeout_interruptible(HZ * wait_time) != 0) { if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
ext4_warning(sb, "MMP startup interrupted, failing mount\n"); ext4_warning(sb, "MMP startup interrupted, failing mount");
goto failed; goto failed;
} }
......
...@@ -400,7 +400,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, ...@@ -400,7 +400,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
/* Even in case of data=writeback it is reasonable to pin /* Even in case of data=writeback it is reasonable to pin
* inode to transaction, to prevent unexpected data loss */ * inode to transaction, to prevent unexpected data loss */
*err = ext4_jbd2_file_inode(handle, orig_inode); *err = ext4_jbd2_inode_add_write(handle, orig_inode);
unlock_pages: unlock_pages:
unlock_page(pagep[0]); unlock_page(pagep[0]);
......
...@@ -1107,6 +1107,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, ...@@ -1107,6 +1107,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
} }
while (1) { while (1) {
if (fatal_signal_pending(current)) {
err = -ERESTARTSYS;
goto errout;
}
cond_resched();
block = dx_get_block(frame->at); block = dx_get_block(frame->at);
ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
start_hash, start_minor_hash); start_hash, start_minor_hash);
...@@ -1613,7 +1618,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi ...@@ -1613,7 +1618,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
if (nokey) if (nokey)
return ERR_PTR(-ENOKEY); return ERR_PTR(-ENOKEY);
ext4_warning(inode->i_sb, ext4_warning(inode->i_sb,
"Inconsistent encryption contexts: %lu/%lu\n", "Inconsistent encryption contexts: %lu/%lu",
(unsigned long) dir->i_ino, (unsigned long) dir->i_ino,
(unsigned long) inode->i_ino); (unsigned long) inode->i_ino);
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
...@@ -2828,7 +2833,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) ...@@ -2828,7 +2833,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
* list entries can cause panics at unmount time. * list entries can cause panics at unmount time.
*/ */
mutex_lock(&sbi->s_orphan_lock); mutex_lock(&sbi->s_orphan_lock);
list_del(&EXT4_I(inode)->i_orphan); list_del_init(&EXT4_I(inode)->i_orphan);
mutex_unlock(&sbi->s_orphan_lock); mutex_unlock(&sbi->s_orphan_lock);
} }
} }
......
...@@ -342,9 +342,7 @@ void ext4_io_submit(struct ext4_io_submit *io) ...@@ -342,9 +342,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
if (bio) { if (bio) {
int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ? int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC : WRITE; WRITE_SYNC : WRITE;
bio_get(io->io_bio);
submit_bio(io_op, io->io_bio); submit_bio(io_op, io->io_bio);
bio_put(io->io_bio);
} }
io->io_bio = NULL; io->io_bio = NULL;
} }
......
...@@ -41,7 +41,7 @@ int ext4_resize_begin(struct super_block *sb) ...@@ -41,7 +41,7 @@ int ext4_resize_begin(struct super_block *sb)
*/ */
if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
ext4_warning(sb, "There are errors in the filesystem, " ext4_warning(sb, "There are errors in the filesystem, "
"so online resizing is not allowed\n"); "so online resizing is not allowed");
return -EPERM; return -EPERM;
} }
......
...@@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb) ...@@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb)
percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
brelse(sbi->s_sbh); brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
for (i = 0; i < EXT4_MAXQUOTAS; i++) for (i = 0; i < EXT4_MAXQUOTAS; i++)
...@@ -3930,6 +3931,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3930,6 +3931,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!err) if (!err)
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
GFP_KERNEL); GFP_KERNEL);
if (!err)
err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
if (err) { if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory"); ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount6; goto failed_mount6;
......
...@@ -219,6 +219,8 @@ static int journal_submit_data_buffers(journal_t *journal, ...@@ -219,6 +219,8 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
mapping = jinode->i_vfs_inode->i_mapping; mapping = jinode->i_vfs_inode->i_mapping;
jinode->i_flags |= JI_COMMIT_RUNNING; jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
...@@ -256,6 +258,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, ...@@ -256,6 +258,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */ /* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
jinode->i_flags |= JI_COMMIT_RUNNING; jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
......
...@@ -94,7 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page); ...@@ -94,7 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
EXPORT_SYMBOL(jbd2_journal_invalidatepage); EXPORT_SYMBOL(jbd2_journal_invalidatepage);
EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit); EXPORT_SYMBOL(jbd2_journal_force_commit);
EXPORT_SYMBOL(jbd2_journal_file_inode); EXPORT_SYMBOL(jbd2_journal_inode_add_write);
EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
......
...@@ -2462,7 +2462,8 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) ...@@ -2462,7 +2462,8 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
/* /*
* File inode in the inode list of the handle's transaction * File inode in the inode list of the handle's transaction
*/ */
int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
unsigned long flags)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal; journal_t *journal;
...@@ -2487,12 +2488,14 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) ...@@ -2487,12 +2488,14 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
* and if jinode->i_next_transaction == transaction, commit code * and if jinode->i_next_transaction == transaction, commit code
* will only file the inode where we want it. * will only file the inode where we want it.
*/ */
if (jinode->i_transaction == transaction || if ((jinode->i_transaction == transaction ||
jinode->i_next_transaction == transaction) jinode->i_next_transaction == transaction) &&
(jinode->i_flags & flags) == flags)
return 0; return 0;
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
jinode->i_flags |= flags;
/* Is inode already attached where we need it? */
if (jinode->i_transaction == transaction || if (jinode->i_transaction == transaction ||
jinode->i_next_transaction == transaction) jinode->i_next_transaction == transaction)
goto done; goto done;
...@@ -2523,6 +2526,17 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) ...@@ -2523,6 +2526,17 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
return 0; return 0;
} }
int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
{
return jbd2_journal_file_inode(handle, jinode,
JI_WRITE_DATA | JI_WAIT_DATA);
}
int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
{
return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA);
}
/* /*
* File truncate and transaction commit interact with each other in a * File truncate and transaction commit interact with each other in a
* non-trivial way. If a transaction writing data block A is * non-trivial way. If a transaction writing data block A is
......
...@@ -619,7 +619,7 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, ...@@ -619,7 +619,7 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
{ {
return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode); return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode);
} }
static inline int ocfs2_begin_ordered_truncate(struct inode *inode, static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
......
...@@ -182,6 +182,8 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, ...@@ -182,6 +182,8 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
} }
dirent = buf->previous; dirent = buf->previous;
if (dirent) { if (dirent) {
if (signal_pending(current))
return -EINTR;
if (__put_user(offset, &dirent->d_off)) if (__put_user(offset, &dirent->d_off))
goto efault; goto efault;
} }
...@@ -261,6 +263,8 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen, ...@@ -261,6 +263,8 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
return -EINVAL; return -EINVAL;
dirent = buf->previous; dirent = buf->previous;
if (dirent) { if (dirent) {
if (signal_pending(current))
return -EINTR;
if (__put_user(offset, &dirent->d_off)) if (__put_user(offset, &dirent->d_off))
goto efault; goto efault;
} }
......
...@@ -403,11 +403,19 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) ...@@ -403,11 +403,19 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
/* Flags in jbd_inode->i_flags */ /* Flags in jbd_inode->i_flags */
#define __JI_COMMIT_RUNNING 0 #define __JI_COMMIT_RUNNING 0
/* Commit of the inode data in progress. We use this flag to protect us from #define __JI_WRITE_DATA 1
#define __JI_WAIT_DATA 2
/*
* Commit of the inode data in progress. We use this flag to protect us from
* concurrent deletion of inode. We cannot use reference to inode for this * concurrent deletion of inode. We cannot use reference to inode for this
* since we cannot afford doing last iput() on behalf of kjournald * since we cannot afford doing last iput() on behalf of kjournald
*/ */
#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
/* Write allocated dirty buffers in this inode before commit */
#define JI_WRITE_DATA (1 << __JI_WRITE_DATA)
/* Wait for outstanding data writes for this inode before commit */
#define JI_WAIT_DATA (1 << __JI_WAIT_DATA)
/** /**
* struct jbd_inode is the structure linking inodes in ordered mode * struct jbd_inode is the structure linking inodes in ordered mode
...@@ -781,9 +789,6 @@ jbd2_time_diff(unsigned long start, unsigned long end) ...@@ -781,9 +789,6 @@ jbd2_time_diff(unsigned long start, unsigned long end)
* @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
* number that will fit in j_blocksize * number that will fit in j_blocksize
* @j_last_sync_writer: most recent pid which did a synchronous write * @j_last_sync_writer: most recent pid which did a synchronous write
* @j_history: Buffer storing the transactions statistics history
* @j_history_max: Maximum number of transactions in the statistics history
* @j_history_cur: Current number of transactions in the statistics history
* @j_history_lock: Protect the transactions statistics history * @j_history_lock: Protect the transactions statistics history
* @j_proc_entry: procfs entry for the jbd statistics directory * @j_proc_entry: procfs entry for the jbd statistics directory
* @j_stats: Overall statistics * @j_stats: Overall statistics
...@@ -1270,7 +1275,8 @@ extern int jbd2_journal_clear_err (journal_t *); ...@@ -1270,7 +1275,8 @@ extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_force_commit_nested(journal_t *); extern int jbd2_journal_force_commit_nested(journal_t *);
extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *inode, loff_t new_size); struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
......
...@@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) ...@@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
free_percpu(brw->fast_read_ctr); free_percpu(brw->fast_read_ctr);
brw->fast_read_ctr = NULL; /* catch use after free bugs */ brw->fast_read_ctr = NULL; /* catch use after free bugs */
} }
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
/* /*
* This is the fast-path for down_read/up_read. If it succeeds we rely * This is the fast-path for down_read/up_read. If it succeeds we rely
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册