“c56f04adeb764bc0d5b02ab6905e824643f9560b”上不存在“...security/git@gitcode.net:openanolis/dragonwell8_jdk.git”
提交 e4ce30f3 编写于 作者: L Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits)
  ext4: Make fsync sync new parent directories in no-journal mode
  ext4: Drop whitespace at end of lines
  ext4: Fix compat EXT4_IOC_ADD_GROUP
  ext4: Conditionally define compat ioctl numbers
  tracing: Convert more ext4 events to DEFINE_EVENT
  ext4: Add new tracepoints to track mballoc's buddy bitmap loads
  ext4: Add a missing trace hook
  ext4: restart ext4_ext_remove_space() after transaction restart
  ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted
  ext4: Avoid crashing on NULL ptr dereference on a filesystem error
  ext4: Use bitops to read/modify i_flags in struct ext4_inode_info
  ext4: Convert calls of ext4_error() to EXT4_ERROR_INODE()
  ext4: Convert callers of ext4_get_blocks() to use ext4_map_blocks()
  ext4: Add new abstraction ext4_map_blocks() underneath ext4_get_blocks()
  ext4: Use our own write_cache_pages()
  ext4: Show journal_checksum option
  ext4: Fix for ext4_mb_collect_stats()
  ext4: check for a good block group before loading buddy pages
  ext4: Prevent creation of files larger than RLIMIT_FSIZE using fallocate
  ext4: Remove extraneous newlines in ext4_msg() calls
  ...

Fixed up trivial conflict in fs/ext4/fsync.c
...@@ -591,14 +591,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, ...@@ -591,14 +591,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ret = ext4_mb_new_blocks(handle, &ar, errp); ret = ext4_mb_new_blocks(handle, &ar, errp);
if (count) if (count)
*count = ar.len; *count = ar.len;
/* /*
* Account for the allocated meta blocks * Account for the allocated meta blocks. We will never
* fail EDQUOT for metdata, but we do account for it.
*/ */
if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock); spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += ar.len; EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
dquot_alloc_block_nofail(inode, ar.len);
} }
return ret; return ret;
} }
......
...@@ -72,9 +72,9 @@ static int add_system_zone(struct ext4_sb_info *sbi, ...@@ -72,9 +72,9 @@ static int add_system_zone(struct ext4_sb_info *sbi,
else if (start_blk >= (entry->start_blk + entry->count)) else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right; n = &(*n)->rb_right;
else { else {
if (start_blk + count > (entry->start_blk + if (start_blk + count > (entry->start_blk +
entry->count)) entry->count))
entry->count = (start_blk + count - entry->count = (start_blk + count -
entry->start_blk); entry->start_blk);
new_node = *n; new_node = *n;
new_entry = rb_entry(new_node, struct ext4_system_zone, new_entry = rb_entry(new_node, struct ext4_system_zone,
......
...@@ -83,11 +83,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, ...@@ -83,11 +83,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
error_msg = "inode out of bounds"; error_msg = "inode out of bounds";
if (error_msg != NULL) if (error_msg != NULL)
__ext4_error(dir->i_sb, function, ext4_error_inode(function, dir,
"bad entry in directory #%lu: %s - block=%llu" "bad entry in directory: %s - block=%llu"
"offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
dir->i_ino, error_msg, error_msg, (unsigned long long) bh->b_blocknr,
(unsigned long long) bh->b_blocknr,
(unsigned) (offset%bh->b_size), offset, (unsigned) (offset%bh->b_size), offset,
le32_to_cpu(de->inode), le32_to_cpu(de->inode),
rlen, de->name_len); rlen, de->name_len);
...@@ -111,7 +110,7 @@ static int ext4_readdir(struct file *filp, ...@@ -111,7 +110,7 @@ static int ext4_readdir(struct file *filp,
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_COMPAT_DIR_INDEX) && EXT4_FEATURE_COMPAT_DIR_INDEX) &&
((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
((inode->i_size >> sb->s_blocksize_bits) == 1))) { ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
err = ext4_dx_readdir(filp, dirent, filldir); err = ext4_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) { if (err != ERR_BAD_DX_DIR) {
...@@ -122,20 +121,20 @@ static int ext4_readdir(struct file *filp, ...@@ -122,20 +121,20 @@ static int ext4_readdir(struct file *filp,
* We don't set the inode dirty flag since it's not * We don't set the inode dirty flag since it's not
* critical that it get flushed back to the disk. * critical that it get flushed back to the disk.
*/ */
EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX);
} }
stored = 0; stored = 0;
offset = filp->f_pos & (sb->s_blocksize - 1); offset = filp->f_pos & (sb->s_blocksize - 1);
while (!error && !stored && filp->f_pos < inode->i_size) { while (!error && !stored && filp->f_pos < inode->i_size) {
ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); struct ext4_map_blocks map;
struct buffer_head map_bh;
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
map_bh.b_state = 0; map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); map.m_len = 1;
err = ext4_map_blocks(NULL, inode, &map, 0);
if (err > 0) { if (err > 0) {
pgoff_t index = map_bh.b_blocknr >> pgoff_t index = map.m_pblk >>
(PAGE_CACHE_SHIFT - inode->i_blkbits); (PAGE_CACHE_SHIFT - inode->i_blkbits);
if (!ra_has_index(&filp->f_ra, index)) if (!ra_has_index(&filp->f_ra, index))
page_cache_sync_readahead( page_cache_sync_readahead(
...@@ -143,7 +142,7 @@ static int ext4_readdir(struct file *filp, ...@@ -143,7 +142,7 @@ static int ext4_readdir(struct file *filp,
&filp->f_ra, filp, &filp->f_ra, filp,
index, 1); index, 1);
filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
bh = ext4_bread(NULL, inode, blk, 0, &err); bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
} }
/* /*
...@@ -152,9 +151,8 @@ static int ext4_readdir(struct file *filp, ...@@ -152,9 +151,8 @@ static int ext4_readdir(struct file *filp,
*/ */
if (!bh) { if (!bh) {
if (!dir_has_error) { if (!dir_has_error) {
ext4_error(sb, "directory #%lu " EXT4_ERROR_INODE(inode, "directory "
"contains a hole at offset %Lu", "contains a hole at offset %Lu",
inode->i_ino,
(unsigned long long) filp->f_pos); (unsigned long long) filp->f_pos);
dir_has_error = 1; dir_has_error = 1;
} }
......
...@@ -29,6 +29,9 @@ ...@@ -29,6 +29,9 @@
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/blockgroup_lock.h> #include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h> #include <linux/percpu_counter.h>
#ifdef __KERNEL__
#include <linux/compat.h>
#endif
/* /*
* The fourth extended filesystem constants/structures * The fourth extended filesystem constants/structures
...@@ -54,10 +57,10 @@ ...@@ -54,10 +57,10 @@
#endif #endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \ #define EXT4_ERROR_INODE(inode, fmt, a...) \
ext4_error_inode(__func__, (inode), (fmt), ## a); ext4_error_inode(__func__, (inode), (fmt), ## a)
#define EXT4_ERROR_FILE(file, fmt, a...) \ #define EXT4_ERROR_FILE(file, fmt, a...) \
ext4_error_file(__func__, (file), (fmt), ## a); ext4_error_file(__func__, (file), (fmt), ## a)
/* data type for block offset of block group */ /* data type for block offset of block group */
typedef int ext4_grpblk_t; typedef int ext4_grpblk_t;
...@@ -72,7 +75,7 @@ typedef __u32 ext4_lblk_t; ...@@ -72,7 +75,7 @@ typedef __u32 ext4_lblk_t;
typedef unsigned int ext4_group_t; typedef unsigned int ext4_group_t;
/* /*
* Flags used in mballoc's allocation_context flags field. * Flags used in mballoc's allocation_context flags field.
* *
* Also used to show what's going on for debugging purposes when the * Also used to show what's going on for debugging purposes when the
* flag field is exported via the traceport interface * flag field is exported via the traceport interface
...@@ -125,6 +128,29 @@ struct ext4_allocation_request { ...@@ -125,6 +128,29 @@ struct ext4_allocation_request {
unsigned int flags; unsigned int flags;
}; };
/*
* Logical to physical block mapping, used by ext4_map_blocks()
*
* This structure is used to pass requests into ext4_map_blocks() as
* well as to store the information returned by ext4_map_blocks(). It
* takes less room on the stack than a struct buffer_head.
*/
#define EXT4_MAP_NEW (1 << BH_New)
#define EXT4_MAP_MAPPED (1 << BH_Mapped)
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
#define EXT4_MAP_UNINIT (1 << BH_Uninit)
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
EXT4_MAP_UNINIT)
struct ext4_map_blocks {
ext4_fsblk_t m_pblk;
ext4_lblk_t m_lblk;
unsigned int m_len;
unsigned int m_flags;
};
/* /*
* For delayed allocation tracking * For delayed allocation tracking
*/ */
...@@ -321,6 +347,83 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) ...@@ -321,6 +347,83 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
return flags & EXT4_OTHER_FLMASK; return flags & EXT4_OTHER_FLMASK;
} }
/*
* Inode flags used for atomic set/get
*/
enum {
EXT4_INODE_SECRM = 0, /* Secure deletion */
EXT4_INODE_UNRM = 1, /* Undelete */
EXT4_INODE_COMPR = 2, /* Compress file */
EXT4_INODE_SYNC = 3, /* Synchronous updates */
EXT4_INODE_IMMUTABLE = 4, /* Immutable file */
EXT4_INODE_APPEND = 5, /* writes to file may only append */
EXT4_INODE_NODUMP = 6, /* do not dump file */
EXT4_INODE_NOATIME = 7, /* do not update atime */
/* Reserved for compression usage... */
EXT4_INODE_DIRTY = 8,
EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
EXT4_INODE_NOCOMPR = 10, /* Don't compress */
EXT4_INODE_ECOMPR = 11, /* Compression error */
/* End compression flags --- maybe not all used */
EXT4_INODE_INDEX = 12, /* hash-indexed directory */
EXT4_INODE_IMAGIC = 13, /* AFS directory */
EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */
EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */
EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */
EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
};
#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \
printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \
EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); }
/*
* Since it's pretty easy to mix up bit numbers and hex values, and we
* can't do a compile-time test for ENUM values, we use a run-time
* test to make sure that EXT4_XXX_FL is consistent with respect to
* EXT4_INODE_XXX. If all is well the printk and BUG_ON will all drop
* out so it won't cost any extra space in the compiled kernel image.
* But it's important that these values are the same, since we are
* using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL
* must be consistent with the values of FS_XXX_FL defined in
* include/linux/fs.h and the on-disk values found in ext2, ext3, and
* ext4 filesystems, and of course the values defined in e2fsprogs.
*
* It's not paranoia if the Murphy's Law really *is* out to get you. :-)
*/
static inline void ext4_check_flag_values(void)
{
CHECK_FLAG_VALUE(SECRM);
CHECK_FLAG_VALUE(UNRM);
CHECK_FLAG_VALUE(COMPR);
CHECK_FLAG_VALUE(SYNC);
CHECK_FLAG_VALUE(IMMUTABLE);
CHECK_FLAG_VALUE(APPEND);
CHECK_FLAG_VALUE(NODUMP);
CHECK_FLAG_VALUE(NOATIME);
CHECK_FLAG_VALUE(DIRTY);
CHECK_FLAG_VALUE(COMPRBLK);
CHECK_FLAG_VALUE(NOCOMPR);
CHECK_FLAG_VALUE(ECOMPR);
CHECK_FLAG_VALUE(INDEX);
CHECK_FLAG_VALUE(IMAGIC);
CHECK_FLAG_VALUE(JOURNAL_DATA);
CHECK_FLAG_VALUE(NOTAIL);
CHECK_FLAG_VALUE(DIRSYNC);
CHECK_FLAG_VALUE(TOPDIR);
CHECK_FLAG_VALUE(HUGE_FILE);
CHECK_FLAG_VALUE(EXTENTS);
CHECK_FLAG_VALUE(EA_INODE);
CHECK_FLAG_VALUE(EOFBLOCKS);
CHECK_FLAG_VALUE(RESERVED);
}
/* Used to pass group descriptor data when online resize is done */ /* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input { struct ext4_new_group_input {
__u32 group; /* Group number for this data */ __u32 group; /* Group number for this data */
...@@ -332,6 +435,18 @@ struct ext4_new_group_input { ...@@ -332,6 +435,18 @@ struct ext4_new_group_input {
__u16 unused; __u16 unused;
}; };
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
struct compat_ext4_new_group_input {
u32 group;
compat_u64 block_bitmap;
compat_u64 inode_bitmap;
compat_u64 inode_table;
u32 blocks_count;
u16 reserved_blocks;
u16 unused;
};
#endif
/* The struct ext4_new_group_input in kernel space, with free_blocks_count */ /* The struct ext4_new_group_input in kernel space, with free_blocks_count */
struct ext4_new_group_data { struct ext4_new_group_data {
__u32 group; __u32 group;
...@@ -355,7 +470,7 @@ struct ext4_new_group_data { ...@@ -355,7 +470,7 @@ struct ext4_new_group_data {
#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
EXT4_GET_BLOCKS_CREATE) EXT4_GET_BLOCKS_CREATE)
/* Caller is from the delayed allocation writeout path, /* Caller is from the delayed allocation writeout path,
so set the magic i_delalloc_reserve_flag after taking the so set the magic i_delalloc_reserve_flag after taking the
inode allocation semaphore for */ inode allocation semaphore for */
#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
/* caller is from the direct IO path, request to creation of an /* caller is from the direct IO path, request to creation of an
...@@ -398,6 +513,7 @@ struct ext4_new_group_data { ...@@ -398,6 +513,7 @@ struct ext4_new_group_data {
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* /*
* ioctl commands in 32 bit emulation * ioctl commands in 32 bit emulation
*/ */
...@@ -408,11 +524,13 @@ struct ext4_new_group_data { ...@@ -408,11 +524,13 @@ struct ext4_new_group_data {
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
#ifdef CONFIG_JBD2_DEBUG #ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
#endif #endif
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
#endif
/* /*
...@@ -616,9 +734,8 @@ struct ext4_ext_cache { ...@@ -616,9 +734,8 @@ struct ext4_ext_cache {
*/ */
struct ext4_inode_info { struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */ __le32 i_data[15]; /* unconverted */
__u32 i_flags;
ext4_fsblk_t i_file_acl;
__u32 i_dtime; __u32 i_dtime;
ext4_fsblk_t i_file_acl;
/* /*
* i_block_group is the number of the block group which contains * i_block_group is the number of the block group which contains
...@@ -629,6 +746,7 @@ struct ext4_inode_info { ...@@ -629,6 +746,7 @@ struct ext4_inode_info {
*/ */
ext4_group_t i_block_group; ext4_group_t i_block_group;
unsigned long i_state_flags; /* Dynamic state flags */ unsigned long i_state_flags; /* Dynamic state flags */
unsigned long i_flags;
ext4_lblk_t i_dir_start_lookup; ext4_lblk_t i_dir_start_lookup;
#ifdef CONFIG_EXT4_FS_XATTR #ifdef CONFIG_EXT4_FS_XATTR
...@@ -1062,22 +1180,25 @@ enum { ...@@ -1062,22 +1180,25 @@ enum {
EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
}; };
static inline int ext4_test_inode_state(struct inode *inode, int bit) #define EXT4_INODE_BIT_FNS(name, field) \
{ static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
return test_bit(bit, &EXT4_I(inode)->i_state_flags); { \
} return test_bit(bit, &EXT4_I(inode)->i_##field); \
} \
static inline void ext4_set_inode_state(struct inode *inode, int bit) static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
{ { \
set_bit(bit, &EXT4_I(inode)->i_state_flags); set_bit(bit, &EXT4_I(inode)->i_##field); \
} \
static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
{ \
clear_bit(bit, &EXT4_I(inode)->i_##field); \
} }
static inline void ext4_clear_inode_state(struct inode *inode, int bit) EXT4_INODE_BIT_FNS(flag, flags)
{ EXT4_INODE_BIT_FNS(state, state_flags)
clear_bit(bit, &EXT4_I(inode)->i_state_flags);
}
#else #else
/* Assume that user mode programs are passing in an ext4fs superblock, not /* Assume that user mode programs are passing in an ext4fs superblock, not
* a kernel struct super_block. This will allow us to call the feature-test * a kernel struct super_block. This will allow us to call the feature-test
...@@ -1264,7 +1385,7 @@ struct ext4_dir_entry_2 { ...@@ -1264,7 +1385,7 @@ struct ext4_dir_entry_2 {
#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
EXT4_FEATURE_COMPAT_DIR_INDEX) && \ EXT4_FEATURE_COMPAT_DIR_INDEX) && \
(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) ext4_test_inode_flag((dir), EXT4_INODE_INDEX))
#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
...@@ -1678,6 +1799,7 @@ struct ext4_group_info { ...@@ -1678,6 +1799,7 @@ struct ext4_group_info {
ext4_grpblk_t bb_first_free; /* first free block */ ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */ ext4_grpblk_t bb_free; /* total free blocks */
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
struct list_head bb_prealloc_list; struct list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK #ifdef DOUBLE_CHECK
void *bb_bitmap; void *bb_bitmap;
...@@ -1772,9 +1894,8 @@ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); ...@@ -1772,9 +1894,8 @@ extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int); extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
int chunk); int chunk);
extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, unsigned int max_blocks, struct ext4_map_blocks *map, int flags);
struct buffer_head *bh_result, int flags);
extern void ext4_ext_truncate(struct inode *); extern void ext4_ext_truncate(struct inode *);
extern void ext4_ext_init(struct super_block *); extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *); extern void ext4_ext_release(struct super_block *);
...@@ -1782,6 +1903,8 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, ...@@ -1782,6 +1903,8 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
loff_t len); loff_t len);
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len); ssize_t len);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
extern int ext4_get_blocks(handle_t *handle, struct inode *inode, extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
sector_t block, unsigned int max_blocks, sector_t block, unsigned int max_blocks,
struct buffer_head *bh, int flags); struct buffer_head *bh, int flags);
......
...@@ -273,7 +273,7 @@ static inline int ext4_should_journal_data(struct inode *inode) ...@@ -273,7 +273,7 @@ static inline int ext4_should_journal_data(struct inode *inode)
return 1; return 1;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
return 1; return 1;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 1; return 1;
return 0; return 0;
} }
...@@ -284,7 +284,7 @@ static inline int ext4_should_order_data(struct inode *inode) ...@@ -284,7 +284,7 @@ static inline int ext4_should_order_data(struct inode *inode)
return 0; return 0;
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
return 0; return 0;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 0; return 0;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
return 1; return 1;
...@@ -297,7 +297,7 @@ static inline int ext4_should_writeback_data(struct inode *inode) ...@@ -297,7 +297,7 @@ static inline int ext4_should_writeback_data(struct inode *inode)
return 0; return 0;
if (EXT4_JOURNAL(inode) == NULL) if (EXT4_JOURNAL(inode) == NULL)
return 1; return 1;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 0; return 0;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
return 1; return 1;
...@@ -321,7 +321,7 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) ...@@ -321,7 +321,7 @@ static inline int ext4_should_dioread_nolock(struct inode *inode)
return 0; return 0;
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
return 0; return 0;
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return 0; return 0;
if (ext4_should_journal_data(inode)) if (ext4_should_journal_data(inode))
return 0; return 0;
......
...@@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, ...@@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle,
if (err <= 0) if (err <= 0)
return err; return err;
err = ext4_truncate_restart_trans(handle, inode, needed); err = ext4_truncate_restart_trans(handle, inode, needed);
/* if (err == 0)
* We have dropped i_data_sem so someone might have cached again err = -EAGAIN;
* an extent we are going to truncate.
*/
ext4_ext_invalidate_cache(inode);
return err; return err;
} }
...@@ -185,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, ...@@ -185,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
/* /*
* If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
* block groups per flexgroup, reserve the first block * block groups per flexgroup, reserve the first block
* group for directories and special files. Regular * group for directories and special files. Regular
* files will start at the second block group. This * files will start at the second block group. This
* tends to speed up directory access and improves * tends to speed up directory access and improves
* fsck times. * fsck times.
*/ */
block_group &= ~(flex_size-1); block_group &= ~(flex_size-1);
...@@ -439,10 +436,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode, ...@@ -439,10 +436,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
return 0; return 0;
corrupted: corrupted:
__ext4_error(inode->i_sb, function, ext4_error_inode(function, inode,
"bad header/extent in inode #%lu: %s - magic %x, " "bad header/extent: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)", "entries %u, max %u(%u), depth %u(%u)",
inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), error_msg, le16_to_cpu(eh->eh_magic),
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
max, le16_to_cpu(eh->eh_depth), depth); max, le16_to_cpu(eh->eh_depth), depth);
...@@ -1622,9 +1619,7 @@ int ext4_ext_try_to_merge(struct inode *inode, ...@@ -1622,9 +1619,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
merge_done = 1; merge_done = 1;
WARN_ON(eh->eh_entries == 0); WARN_ON(eh->eh_entries == 0);
if (!eh->eh_entries) if (!eh->eh_entries)
ext4_error(inode->i_sb, EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
"inode#%lu, eh->eh_entries = 0!",
inode->i_ino);
} }
return merge_done; return merge_done;
...@@ -2039,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, ...@@ -2039,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
struct ext4_ext_cache *cex; struct ext4_ext_cache *cex;
int ret = EXT4_EXT_CACHE_NO; int ret = EXT4_EXT_CACHE_NO;
/* /*
* We borrow i_block_reservation_lock to protect i_cached_extent * We borrow i_block_reservation_lock to protect i_cached_extent
*/ */
spin_lock(&EXT4_I(inode)->i_block_reservation_lock); spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
...@@ -2361,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) ...@@ -2361,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
int depth = ext_depth(inode); int depth = ext_depth(inode);
struct ext4_ext_path *path; struct ext4_ext_path *path;
handle_t *handle; handle_t *handle;
int i = 0, err = 0; int i, err;
ext_debug("truncate since %u\n", start); ext_debug("truncate since %u\n", start);
...@@ -2370,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) ...@@ -2370,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
if (IS_ERR(handle)) if (IS_ERR(handle))
return PTR_ERR(handle); return PTR_ERR(handle);
again:
ext4_ext_invalidate_cache(inode); ext4_ext_invalidate_cache(inode);
/* /*
* We start scanning from right side, freeing all the blocks * We start scanning from right side, freeing all the blocks
* after i_size and walking into the tree depth-wise. * after i_size and walking into the tree depth-wise.
*/ */
depth = ext_depth(inode);
path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
if (path == NULL) { if (path == NULL) {
ext4_journal_stop(handle); ext4_journal_stop(handle);
return -ENOMEM; return -ENOMEM;
} }
path[0].p_depth = depth;
path[0].p_hdr = ext_inode_hdr(inode); path[0].p_hdr = ext_inode_hdr(inode);
if (ext4_ext_check(inode, path[0].p_hdr, depth)) { if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
err = -EIO; err = -EIO;
goto out; goto out;
} }
path[0].p_depth = depth; i = err = 0;
while (i >= 0 && err == 0) { while (i >= 0 && err == 0) {
if (i == depth) { if (i == depth) {
...@@ -2480,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) ...@@ -2480,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
out: out:
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
kfree(path); kfree(path);
if (err == -EAGAIN)
goto again;
ext4_journal_stop(handle); ext4_journal_stop(handle);
return err; return err;
...@@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error) ...@@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error)
/* FIXME!! we need to try to merge to left or right after zero-out */ /* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{ {
int ret = -EIO; int ret;
struct bio *bio; struct bio *bio;
int blkbits, blocksize; int blkbits, blocksize;
sector_t ee_pblock; sector_t ee_pblock;
...@@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) ...@@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
len = ee_len; len = ee_len;
bio = bio_alloc(GFP_NOIO, len); bio = bio_alloc(GFP_NOIO, len);
if (!bio)
return -ENOMEM;
bio->bi_sector = ee_pblock; bio->bi_sector = ee_pblock;
bio->bi_bdev = inode->i_sb->s_bdev; bio->bi_bdev = inode->i_sb->s_bdev;
...@@ -2595,22 +2598,20 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) ...@@ -2595,22 +2598,20 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
submit_bio(WRITE, bio); submit_bio(WRITE, bio);
wait_for_completion(&event); wait_for_completion(&event);
if (test_bit(BIO_UPTODATE, &bio->bi_flags)) if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
ret = 0; bio_put(bio);
else { return -EIO;
ret = -EIO;
break;
} }
bio_put(bio); bio_put(bio);
ee_len -= done; ee_len -= done;
ee_pblock += done << (blkbits - 9); ee_pblock += done << (blkbits - 9);
} }
return ret; return 0;
} }
#define EXT4_EXT_ZERO_LEN 7 #define EXT4_EXT_ZERO_LEN 7
/* /*
* This function is called by ext4_ext_get_blocks() if someone tries to write * This function is called by ext4_ext_map_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized * to an uninitialized extent. It may result in splitting the uninitialized
* extent into multiple extents (upto three - one initialized and two * extent into multiple extents (upto three - one initialized and two
* uninitialized). * uninitialized).
...@@ -2620,39 +2621,55 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) ...@@ -2620,39 +2621,55 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
* c> Splits in three extents: Somone is writing in middle of the extent * c> Splits in three extents: Somone is writing in middle of the extent
*/ */
static int ext4_ext_convert_to_initialized(handle_t *handle, static int ext4_ext_convert_to_initialized(handle_t *handle,
struct inode *inode, struct inode *inode,
struct ext4_ext_path *path, struct ext4_map_blocks *map,
ext4_lblk_t iblock, struct ext4_ext_path *path)
unsigned int max_blocks)
{ {
struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex, newex, orig_ex;
struct ext4_extent *ex1 = NULL; struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL; struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL; struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
ext4_lblk_t ee_block; ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth; unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock; ext4_fsblk_t newblock;
int err = 0; int err = 0;
int ret = 0; int ret = 0;
int may_zeroout;
ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)map->m_lblk, map->m_len);
eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map->m_len)
eof_block = map->m_lblk + map->m_len;
depth = ext_depth(inode); depth = ext_depth(inode);
eh = path[depth].p_hdr; eh = path[depth].p_hdr;
ex = path[depth].p_ext; ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block); ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block); allocated = ee_len - (map->m_lblk - ee_block);
newblock = iblock - ee_block + ext_pblock(ex); newblock = map->m_lblk - ee_block + ext_pblock(ex);
ex2 = ex; ex2 = ex;
orig_ex.ee_block = ex->ee_block; orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len); orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size.
*/
may_zeroout = ee_block + ee_len <= eof_block;
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
if (err) if (err)
goto out; goto out;
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex); err = ext4_ext_zeroout(inode, &orig_ex);
if (err) if (err)
goto fix_extent_len; goto fix_extent_len;
...@@ -2665,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2665,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
return allocated; return allocated;
} }
/* ex1: ee_block to iblock - 1 : uninitialized */ /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
if (iblock > ee_block) { if (map->m_lblk > ee_block) {
ex1 = ex; ex1 = ex;
ex1->ee_len = cpu_to_le16(iblock - ee_block); ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
ext4_ext_mark_uninitialized(ex1); ext4_ext_mark_uninitialized(ex1);
ex2 = &newex; ex2 = &newex;
} }
...@@ -2677,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2677,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* we insert ex3, if ex1 is NULL. This is to avoid temporary * we insert ex3, if ex1 is NULL. This is to avoid temporary
* overlap of blocks. * overlap of blocks.
*/ */
if (!ex1 && allocated > max_blocks) if (!ex1 && allocated > map->m_len)
ex2->ee_len = cpu_to_le16(max_blocks); ex2->ee_len = cpu_to_le16(map->m_len);
/* ex3: to ee_block + ee_len : uninitialised */ /* ex3: to ee_block + ee_len : uninitialised */
if (allocated > max_blocks) { if (allocated > map->m_len) {
unsigned int newdepth; unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
if (allocated <= EXT4_EXT_ZERO_LEN) { if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
/* /*
* iblock == ee_block is handled by the zerouout * map->m_lblk == ee_block is handled by the zerouout
* at the beginning. * at the beginning.
* Mark first half uninitialized. * Mark first half uninitialized.
* Mark second half initialized and zero out the * Mark second half initialized and zero out the
...@@ -2698,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2698,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
ex3 = &newex; ex3 = &newex;
ex3->ee_block = cpu_to_le32(iblock); ex3->ee_block = cpu_to_le32(map->m_lblk);
ext4_ext_store_pblock(ex3, newblock); ext4_ext_store_pblock(ex3, newblock);
ex3->ee_len = cpu_to_le16(allocated); ex3->ee_len = cpu_to_le16(allocated);
err = ext4_ext_insert_extent(handle, inode, path, err = ext4_ext_insert_extent(handle, inode, path,
...@@ -2711,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2711,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_len = orig_ex.ee_len; ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
/* blocks available from iblock */ /* blocks available from map->m_lblk */
return allocated; return allocated;
} else if (err) } else if (err)
...@@ -2733,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2733,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/ */
depth = ext_depth(inode); depth = ext_depth(inode);
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, path = ext4_ext_find_extent(inode, map->m_lblk,
iblock, path); path);
if (IS_ERR(path)) { if (IS_ERR(path)) {
err = PTR_ERR(path); err = PTR_ERR(path);
return err; return err;
...@@ -2754,12 +2771,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2754,12 +2771,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
return allocated; return allocated;
} }
ex3 = &newex; ex3 = &newex;
ex3->ee_block = cpu_to_le32(iblock + max_blocks); ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
ext4_ext_store_pblock(ex3, newblock + max_blocks); ext4_ext_store_pblock(ex3, newblock + map->m_len);
ex3->ee_len = cpu_to_le16(allocated - max_blocks); ex3->ee_len = cpu_to_le16(allocated - map->m_len);
ext4_ext_mark_uninitialized(ex3); ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
if (err == -ENOSPC) { if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex); err = ext4_ext_zeroout(inode, &orig_ex);
if (err) if (err)
goto fix_extent_len; goto fix_extent_len;
...@@ -2769,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2769,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */ /* zeroed the full extent */
/* blocks available from iblock */ /* blocks available from map->m_lblk */
return allocated; return allocated;
} else if (err) } else if (err)
...@@ -2783,11 +2800,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2783,11 +2800,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* update the extent length after successful insert of the * update the extent length after successful insert of the
* split extent * split extent
*/ */
orig_ex.ee_len = cpu_to_le16(ee_len - ee_len -= ext4_ext_get_actual_len(ex3);
ext4_ext_get_actual_len(ex3)); orig_ex.ee_len = cpu_to_le16(ee_len);
may_zeroout = ee_block + ee_len <= eof_block;
depth = newdepth; depth = newdepth;
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, iblock, path); path = ext4_ext_find_extent(inode, map->m_lblk, path);
if (IS_ERR(path)) { if (IS_ERR(path)) {
err = PTR_ERR(path); err = PTR_ERR(path);
goto out; goto out;
...@@ -2801,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2801,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (err) if (err)
goto out; goto out;
allocated = max_blocks; allocated = map->m_len;
/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
* to insert a extent in the middle zerout directly * to insert a extent in the middle zerout directly
* otherwise give the extent a chance to merge to left * otherwise give the extent a chance to merge to left
*/ */
if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
iblock != ee_block) { map->m_lblk != ee_block && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex); err = ext4_ext_zeroout(inode, &orig_ex);
if (err) if (err)
goto fix_extent_len; goto fix_extent_len;
...@@ -2818,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2818,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */ /* zero out the first half */
/* blocks available from iblock */ /* blocks available from map->m_lblk */
return allocated; return allocated;
} }
} }
...@@ -2829,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2829,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/ */
if (ex1 && ex1 != ex) { if (ex1 && ex1 != ex) {
ex1 = ex; ex1 = ex;
ex1->ee_len = cpu_to_le16(iblock - ee_block); ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
ext4_ext_mark_uninitialized(ex1); ext4_ext_mark_uninitialized(ex1);
ex2 = &newex; ex2 = &newex;
} }
/* ex2: iblock to iblock + maxblocks-1 : initialised */ /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
ex2->ee_block = cpu_to_le32(iblock); ex2->ee_block = cpu_to_le32(map->m_lblk);
ext4_ext_store_pblock(ex2, newblock); ext4_ext_store_pblock(ex2, newblock);
ex2->ee_len = cpu_to_le16(allocated); ex2->ee_len = cpu_to_le16(allocated);
if (ex2 != ex) if (ex2 != ex)
...@@ -2877,7 +2896,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2877,7 +2896,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out; goto out;
insert: insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
if (err == -ENOSPC) { if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex); err = ext4_ext_zeroout(inode, &orig_ex);
if (err) if (err)
goto fix_extent_len; goto fix_extent_len;
...@@ -2904,7 +2923,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2904,7 +2923,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
} }
/* /*
* This function is called by ext4_ext_get_blocks() from * This function is called by ext4_ext_map_blocks() from
* ext4_get_blocks_dio_write() when DIO to write * ext4_get_blocks_dio_write() when DIO to write
* to an uninitialized extent. * to an uninitialized extent.
* *
...@@ -2927,9 +2946,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2927,9 +2946,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/ */
static int ext4_split_unwritten_extents(handle_t *handle, static int ext4_split_unwritten_extents(handle_t *handle,
struct inode *inode, struct inode *inode,
struct ext4_map_blocks *map,
struct ext4_ext_path *path, struct ext4_ext_path *path,
ext4_lblk_t iblock,
unsigned int max_blocks,
int flags) int flags)
{ {
struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex, newex, orig_ex;
...@@ -2937,41 +2955,55 @@ static int ext4_split_unwritten_extents(handle_t *handle, ...@@ -2937,41 +2955,55 @@ static int ext4_split_unwritten_extents(handle_t *handle,
struct ext4_extent *ex2 = NULL; struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL; struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
ext4_lblk_t ee_block; ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth; unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock; ext4_fsblk_t newblock;
int err = 0; int err = 0;
int may_zeroout;
ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)map->m_lblk, map->m_len);
eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map->m_len)
eof_block = map->m_lblk + map->m_len;
ext_debug("ext4_split_unwritten_extents: inode %lu,"
"iblock %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)iblock, max_blocks);
depth = ext_depth(inode); depth = ext_depth(inode);
eh = path[depth].p_hdr; eh = path[depth].p_hdr;
ex = path[depth].p_ext; ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block); ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block); allocated = ee_len - (map->m_lblk - ee_block);
newblock = iblock - ee_block + ext_pblock(ex); newblock = map->m_lblk - ee_block + ext_pblock(ex);
ex2 = ex; ex2 = ex;
orig_ex.ee_block = ex->ee_block; orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len); orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size.
*/
may_zeroout = ee_block + ee_len <= eof_block;
/* /*
* If the uninitialized extent begins at the same logical * If the uninitialized extent begins at the same logical
* block where the write begins, and the write completely * block where the write begins, and the write completely
* covers the extent, then we don't need to split it. * covers the extent, then we don't need to split it.
*/ */
if ((iblock == ee_block) && (allocated <= max_blocks)) if ((map->m_lblk == ee_block) && (allocated <= map->m_len))
return allocated; return allocated;
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
if (err) if (err)
goto out; goto out;
/* ex1: ee_block to iblock - 1 : uninitialized */ /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
if (iblock > ee_block) { if (map->m_lblk > ee_block) {
ex1 = ex; ex1 = ex;
ex1->ee_len = cpu_to_le16(iblock - ee_block); ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
ext4_ext_mark_uninitialized(ex1); ext4_ext_mark_uninitialized(ex1);
ex2 = &newex; ex2 = &newex;
} }
...@@ -2980,18 +3012,18 @@ static int ext4_split_unwritten_extents(handle_t *handle, ...@@ -2980,18 +3012,18 @@ static int ext4_split_unwritten_extents(handle_t *handle,
* we insert ex3, if ex1 is NULL. This is to avoid temporary * we insert ex3, if ex1 is NULL. This is to avoid temporary
* overlap of blocks. * overlap of blocks.
*/ */
if (!ex1 && allocated > max_blocks) if (!ex1 && allocated > map->m_len)
ex2->ee_len = cpu_to_le16(max_blocks); ex2->ee_len = cpu_to_le16(map->m_len);
/* ex3: to ee_block + ee_len : uninitialised */ /* ex3: to ee_block + ee_len : uninitialised */
if (allocated > max_blocks) { if (allocated > map->m_len) {
unsigned int newdepth; unsigned int newdepth;
ex3 = &newex; ex3 = &newex;
ex3->ee_block = cpu_to_le32(iblock + max_blocks); ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
ext4_ext_store_pblock(ex3, newblock + max_blocks); ext4_ext_store_pblock(ex3, newblock + map->m_len);
ex3->ee_len = cpu_to_le16(allocated - max_blocks); ex3->ee_len = cpu_to_le16(allocated - map->m_len);
ext4_ext_mark_uninitialized(ex3); ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
if (err == -ENOSPC) { if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex); err = ext4_ext_zeroout(inode, &orig_ex);
if (err) if (err)
goto fix_extent_len; goto fix_extent_len;
...@@ -3001,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, ...@@ -3001,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */ /* zeroed the full extent */
/* blocks available from iblock */ /* blocks available from map->m_lblk */
return allocated; return allocated;
} else if (err) } else if (err)
...@@ -3015,11 +3047,13 @@ static int ext4_split_unwritten_extents(handle_t *handle, ...@@ -3015,11 +3047,13 @@ static int ext4_split_unwritten_extents(handle_t *handle,
* update the extent length after successful insert of the * update the extent length after successful insert of the
* split extent * split extent
*/ */
orig_ex.ee_len = cpu_to_le16(ee_len - ee_len -= ext4_ext_get_actual_len(ex3);
ext4_ext_get_actual_len(ex3)); orig_ex.ee_len = cpu_to_le16(ee_len);
may_zeroout = ee_block + ee_len <= eof_block;
depth = newdepth; depth = newdepth;
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, iblock, path); path = ext4_ext_find_extent(inode, map->m_lblk, path);
if (IS_ERR(path)) { if (IS_ERR(path)) {
err = PTR_ERR(path); err = PTR_ERR(path);
goto out; goto out;
...@@ -3033,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, ...@@ -3033,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
if (err) if (err)
goto out; goto out;
allocated = max_blocks; allocated = map->m_len;
} }
/* /*
* If there was a change of depth as part of the * If there was a change of depth as part of the
...@@ -3042,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle, ...@@ -3042,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle,
*/ */
if (ex1 && ex1 != ex) { if (ex1 && ex1 != ex) {
ex1 = ex; ex1 = ex;
ex1->ee_len = cpu_to_le16(iblock - ee_block); ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
ext4_ext_mark_uninitialized(ex1); ext4_ext_mark_uninitialized(ex1);
ex2 = &newex; ex2 = &newex;
} }
/* /*
* ex2: iblock to iblock + maxblocks-1 : to be direct IO written, * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written
* uninitialised still. * using direct I/O, uninitialised still.
*/ */
ex2->ee_block = cpu_to_le32(iblock); ex2->ee_block = cpu_to_le32(map->m_lblk);
ext4_ext_store_pblock(ex2, newblock); ext4_ext_store_pblock(ex2, newblock);
ex2->ee_len = cpu_to_le16(allocated); ex2->ee_len = cpu_to_le16(allocated);
ext4_ext_mark_uninitialized(ex2); ext4_ext_mark_uninitialized(ex2);
...@@ -3062,7 +3096,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, ...@@ -3062,7 +3096,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
goto out; goto out;
insert: insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC) { if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex); err = ext4_ext_zeroout(inode, &orig_ex);
if (err) if (err)
goto fix_extent_len; goto fix_extent_len;
...@@ -3152,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, ...@@ -3152,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
static int static int
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, unsigned int max_blocks, struct ext4_map_blocks *map,
struct ext4_ext_path *path, int flags, struct ext4_ext_path *path, int flags,
unsigned int allocated, struct buffer_head *bh_result, unsigned int allocated, ext4_fsblk_t newblock)
ext4_fsblk_t newblock)
{ {
int ret = 0; int ret = 0;
int err = 0; int err = 0;
...@@ -3163,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3163,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
"block %llu, max_blocks %u, flags %d, allocated %u", "block %llu, max_blocks %u, flags %d, allocated %u",
inode->i_ino, (unsigned long long)iblock, max_blocks, inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
flags, allocated); flags, allocated);
ext4_ext_show_leaf(inode, path); ext4_ext_show_leaf(inode, path);
/* get_block() before submit the IO, split the extent */ /* get_block() before submit the IO, split the extent */
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
ret = ext4_split_unwritten_extents(handle, ret = ext4_split_unwritten_extents(handle, inode, map,
inode, path, iblock, path, flags);
max_blocks, flags);
/* /*
* Flag the inode(non aio case) or end_io struct (aio case) * Flag the inode(non aio case) or end_io struct (aio case)
* that this IO needs to convertion to written when IO is * that this IO needs to convertion to written when IO is
...@@ -3182,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3182,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
else else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode)) if (ext4_should_dioread_nolock(inode))
set_buffer_uninit(bh_result); map->m_flags |= EXT4_MAP_UNINIT;
goto out; goto out;
} }
/* IO end_io complete, convert the filled extent to written */ /* IO end_io complete, convert the filled extent to written */
...@@ -3210,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3210,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* the buffer head will be unmapped so that * the buffer head will be unmapped so that
* a read from the block returns 0s. * a read from the block returns 0s.
*/ */
set_buffer_unwritten(bh_result); map->m_flags |= EXT4_MAP_UNWRITTEN;
goto out1; goto out1;
} }
/* buffered write, writepage time, convert*/ /* buffered write, writepage time, convert*/
ret = ext4_ext_convert_to_initialized(handle, inode, ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
path, iblock,
max_blocks);
if (ret >= 0) if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
out: out:
...@@ -3226,7 +3256,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3226,7 +3256,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
goto out2; goto out2;
} else } else
allocated = ret; allocated = ret;
set_buffer_new(bh_result); map->m_flags |= EXT4_MAP_NEW;
/* /*
* if we allocated more blocks than requested * if we allocated more blocks than requested
* we need to make sure we unmap the extra block * we need to make sure we unmap the extra block
...@@ -3234,11 +3264,11 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3234,11 +3264,11 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* unmapped later when we find the buffer_head marked * unmapped later when we find the buffer_head marked
* new. * new.
*/ */
if (allocated > max_blocks) { if (allocated > map->m_len) {
unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
newblock + max_blocks, newblock + map->m_len,
allocated - max_blocks); allocated - map->m_len);
allocated = max_blocks; allocated = map->m_len;
} }
/* /*
...@@ -3252,13 +3282,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3252,13 +3282,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ext4_da_update_reserve_space(inode, allocated, 0); ext4_da_update_reserve_space(inode, allocated, 0);
map_out: map_out:
set_buffer_mapped(bh_result); map->m_flags |= EXT4_MAP_MAPPED;
out1: out1:
if (allocated > max_blocks) if (allocated > map->m_len)
allocated = max_blocks; allocated = map->m_len;
ext4_ext_show_leaf(inode, path); ext4_ext_show_leaf(inode, path);
bh_result->b_bdev = inode->i_sb->s_bdev; map->m_pblk = newblock;
bh_result->b_blocknr = newblock; map->m_len = allocated;
out2: out2:
if (path) { if (path) {
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
...@@ -3284,26 +3314,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3284,26 +3314,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* *
* return < 0, error case. * return < 0, error case.
*/ */
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, struct ext4_map_blocks *map, int flags)
unsigned int max_blocks, struct buffer_head *bh_result,
int flags)
{ {
struct ext4_ext_path *path = NULL; struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
struct ext4_extent newex, *ex, *last_ex; struct ext4_extent newex, *ex, *last_ex;
ext4_fsblk_t newblock; ext4_fsblk_t newblock;
int err = 0, depth, ret, cache_type; int i, err = 0, depth, ret, cache_type;
unsigned int allocated = 0; unsigned int allocated = 0;
struct ext4_allocation_request ar; struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
__clear_bit(BH_New, &bh_result->b_state);
ext_debug("blocks %u/%u requested for inode %lu\n", ext_debug("blocks %u/%u requested for inode %lu\n",
iblock, max_blocks, inode->i_ino); map->m_lblk, map->m_len, inode->i_ino);
/* check in cache */ /* check in cache */
cache_type = ext4_ext_in_cache(inode, iblock, &newex); cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex);
if (cache_type) { if (cache_type) {
if (cache_type == EXT4_EXT_CACHE_GAP) { if (cache_type == EXT4_EXT_CACHE_GAP) {
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
...@@ -3316,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3316,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* we should allocate requested block */ /* we should allocate requested block */
} else if (cache_type == EXT4_EXT_CACHE_EXTENT) { } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
/* block is already allocated */ /* block is already allocated */
newblock = iblock newblock = map->m_lblk
- le32_to_cpu(newex.ee_block) - le32_to_cpu(newex.ee_block)
+ ext_pblock(&newex); + ext_pblock(&newex);
/* number of remaining blocks in the extent */ /* number of remaining blocks in the extent */
allocated = ext4_ext_get_actual_len(&newex) - allocated = ext4_ext_get_actual_len(&newex) -
(iblock - le32_to_cpu(newex.ee_block)); (map->m_lblk - le32_to_cpu(newex.ee_block));
goto out; goto out;
} else { } else {
BUG(); BUG();
...@@ -3329,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3329,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
} }
/* find extent for this block */ /* find extent for this block */
path = ext4_ext_find_extent(inode, iblock, NULL); path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
if (IS_ERR(path)) { if (IS_ERR(path)) {
err = PTR_ERR(path); err = PTR_ERR(path);
path = NULL; path = NULL;
...@@ -3345,8 +3372,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3345,8 +3372,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
*/ */
if (unlikely(path[depth].p_ext == NULL && depth != 0)) { if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
EXT4_ERROR_INODE(inode, "bad extent address " EXT4_ERROR_INODE(inode, "bad extent address "
"iblock: %d, depth: %d pblock %lld", "lblock: %lu, depth: %d pblock %lld",
iblock, depth, path[depth].p_block); (unsigned long) map->m_lblk, depth,
path[depth].p_block);
err = -EIO; err = -EIO;
goto out2; goto out2;
} }
...@@ -3364,12 +3392,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3364,12 +3392,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
*/ */
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
/* if found extent covers block, simply return it */ /* if found extent covers block, simply return it */
if (in_range(iblock, ee_block, ee_len)) { if (in_range(map->m_lblk, ee_block, ee_len)) {
newblock = iblock - ee_block + ee_start; newblock = map->m_lblk - ee_block + ee_start;
/* number of remaining blocks in the extent */ /* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block); allocated = ee_len - (map->m_lblk - ee_block);
ext_debug("%u fit into %u:%d -> %llu\n", iblock, ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
ee_block, ee_len, newblock); ee_block, ee_len, newblock);
/* Do not put uninitialized extent in the cache */ /* Do not put uninitialized extent in the cache */
if (!ext4_ext_is_uninitialized(ex)) { if (!ext4_ext_is_uninitialized(ex)) {
...@@ -3379,8 +3407,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3379,8 +3407,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
goto out; goto out;
} }
ret = ext4_ext_handle_uninitialized_extents(handle, ret = ext4_ext_handle_uninitialized_extents(handle,
inode, iblock, max_blocks, path, inode, map, path, flags, allocated,
flags, allocated, bh_result, newblock); newblock);
return ret; return ret;
} }
} }
...@@ -3394,7 +3422,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3394,7 +3422,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* put just found gap into cache to speed up * put just found gap into cache to speed up
* subsequent requests * subsequent requests
*/ */
ext4_ext_put_gap_in_cache(inode, path, iblock); ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
goto out2; goto out2;
} }
/* /*
...@@ -3402,11 +3430,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3402,11 +3430,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
*/ */
/* find neighbour allocated blocks */ /* find neighbour allocated blocks */
ar.lleft = iblock; ar.lleft = map->m_lblk;
err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
if (err) if (err)
goto out2; goto out2;
ar.lright = iblock; ar.lright = map->m_lblk;
err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
if (err) if (err)
goto out2; goto out2;
...@@ -3417,26 +3445,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3417,26 +3445,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* EXT_INIT_MAX_LEN and for an uninitialized extent this limit is * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
* EXT_UNINIT_MAX_LEN. * EXT_UNINIT_MAX_LEN.
*/ */
if (max_blocks > EXT_INIT_MAX_LEN && if (map->m_len > EXT_INIT_MAX_LEN &&
!(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
max_blocks = EXT_INIT_MAX_LEN; map->m_len = EXT_INIT_MAX_LEN;
else if (max_blocks > EXT_UNINIT_MAX_LEN && else if (map->m_len > EXT_UNINIT_MAX_LEN &&
(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
max_blocks = EXT_UNINIT_MAX_LEN; map->m_len = EXT_UNINIT_MAX_LEN;
/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
newex.ee_block = cpu_to_le32(iblock); newex.ee_block = cpu_to_le32(map->m_lblk);
newex.ee_len = cpu_to_le16(max_blocks); newex.ee_len = cpu_to_le16(map->m_len);
err = ext4_ext_check_overlap(inode, &newex, path); err = ext4_ext_check_overlap(inode, &newex, path);
if (err) if (err)
allocated = ext4_ext_get_actual_len(&newex); allocated = ext4_ext_get_actual_len(&newex);
else else
allocated = max_blocks; allocated = map->m_len;
/* allocate new block */ /* allocate new block */
ar.inode = inode; ar.inode = inode;
ar.goal = ext4_ext_find_goal(inode, path, iblock); ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
ar.logical = iblock; ar.logical = map->m_lblk;
ar.len = allocated; ar.len = allocated;
if (S_ISREG(inode->i_mode)) if (S_ISREG(inode->i_mode))
ar.flags = EXT4_MB_HINT_DATA; ar.flags = EXT4_MB_HINT_DATA;
...@@ -3470,21 +3498,33 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3470,21 +3498,33 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
EXT4_STATE_DIO_UNWRITTEN); EXT4_STATE_DIO_UNWRITTEN);
} }
if (ext4_should_dioread_nolock(inode)) if (ext4_should_dioread_nolock(inode))
set_buffer_uninit(bh_result); map->m_flags |= EXT4_MAP_UNINIT;
} }
if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {
if (unlikely(!eh->eh_entries)) { if (unlikely(!eh->eh_entries)) {
EXT4_ERROR_INODE(inode, EXT4_ERROR_INODE(inode,
"eh->eh_entries == 0 ee_block %d", "eh->eh_entries == 0 and "
ex->ee_block); "EOFBLOCKS_FL set");
err = -EIO; err = -EIO;
goto out2; goto out2;
} }
last_ex = EXT_LAST_EXTENT(eh); last_ex = EXT_LAST_EXTENT(eh);
if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) /*
+ ext4_ext_get_actual_len(last_ex)) * If the current leaf block was reached by looking at
EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; * the last index block all the way down the tree, and
* we are extending the inode beyond the last extent
* in the current leaf block, then clear the
* EOFBLOCKS_FL flag.
*/
for (i = depth-1; i >= 0; i--) {
if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
break;
}
if ((i < 0) &&
(map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
ext4_ext_get_actual_len(last_ex)))
ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
} }
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) { if (err) {
...@@ -3500,9 +3540,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3500,9 +3540,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* previous routine could use block we allocated */ /* previous routine could use block we allocated */
newblock = ext_pblock(&newex); newblock = ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex); allocated = ext4_ext_get_actual_len(&newex);
if (allocated > max_blocks) if (allocated > map->m_len)
allocated = max_blocks; allocated = map->m_len;
set_buffer_new(bh_result); map->m_flags |= EXT4_MAP_NEW;
/* /*
* Update reserved blocks/metadata blocks after successful * Update reserved blocks/metadata blocks after successful
...@@ -3516,18 +3556,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3516,18 +3556,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* when it is _not_ an uninitialized extent. * when it is _not_ an uninitialized extent.
*/ */
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
ext4_ext_put_in_cache(inode, iblock, allocated, newblock, ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock,
EXT4_EXT_CACHE_EXTENT); EXT4_EXT_CACHE_EXTENT);
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
} else } else
ext4_update_inode_fsync_trans(handle, inode, 0); ext4_update_inode_fsync_trans(handle, inode, 0);
out: out:
if (allocated > max_blocks) if (allocated > map->m_len)
allocated = max_blocks; allocated = map->m_len;
ext4_ext_show_leaf(inode, path); ext4_ext_show_leaf(inode, path);
set_buffer_mapped(bh_result); map->m_flags |= EXT4_MAP_MAPPED;
bh_result->b_bdev = inode->i_sb->s_bdev; map->m_pblk = newblock;
bh_result->b_blocknr = newblock; map->m_len = allocated;
out2: out2:
if (path) { if (path) {
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
...@@ -3625,7 +3665,7 @@ static void ext4_falloc_update_inode(struct inode *inode, ...@@ -3625,7 +3665,7 @@ static void ext4_falloc_update_inode(struct inode *inode,
* can proceed even if the new size is the same as i_size. * can proceed even if the new size is the same as i_size.
*/ */
if (new_size > i_size_read(inode)) if (new_size > i_size_read(inode))
EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
} }
} }
...@@ -3640,55 +3680,57 @@ static void ext4_falloc_update_inode(struct inode *inode, ...@@ -3640,55 +3680,57 @@ static void ext4_falloc_update_inode(struct inode *inode,
long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
{ {
handle_t *handle; handle_t *handle;
ext4_lblk_t block;
loff_t new_size; loff_t new_size;
unsigned int max_blocks; unsigned int max_blocks;
int ret = 0; int ret = 0;
int ret2 = 0; int ret2 = 0;
int retries = 0; int retries = 0;
struct buffer_head map_bh; struct ext4_map_blocks map;
unsigned int credits, blkbits = inode->i_blkbits; unsigned int credits, blkbits = inode->i_blkbits;
/* /*
* currently supporting (pre)allocate mode for extent-based * currently supporting (pre)allocate mode for extent-based
* files _only_ * files _only_
*/ */
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* preallocation to directories is currently not supported */ /* preallocation to directories is currently not supported */
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
return -ENODEV; return -ENODEV;
block = offset >> blkbits; map.m_lblk = offset >> blkbits;
/* /*
* We can't just convert len to max_blocks because * We can't just convert len to max_blocks because
* If blocksize = 4096 offset = 3072 and len = 2048 * If blocksize = 4096 offset = 3072 and len = 2048
*/ */
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- block; - map.m_lblk;
/* /*
* credits to insert 1 extent into extent tree * credits to insert 1 extent into extent tree
*/ */
credits = ext4_chunk_trans_blocks(inode, max_blocks); credits = ext4_chunk_trans_blocks(inode, max_blocks);
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
ret = inode_newsize_ok(inode, (len + offset));
if (ret) {
mutex_unlock(&inode->i_mutex);
return ret;
}
retry: retry:
while (ret >= 0 && ret < max_blocks) { while (ret >= 0 && ret < max_blocks) {
block = block + ret; map.m_lblk = map.m_lblk + ret;
max_blocks = max_blocks - ret; map.m_len = max_blocks = max_blocks - ret;
handle = ext4_journal_start(inode, credits); handle = ext4_journal_start(inode, credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
break; break;
} }
map_bh.b_state = 0; ret = ext4_map_blocks(handle, inode, &map,
ret = ext4_get_blocks(handle, inode, block,
max_blocks, &map_bh,
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
if (ret <= 0) { if (ret <= 0) {
#ifdef EXT4FS_DEBUG #ifdef EXT4FS_DEBUG
WARN_ON(ret <= 0); WARN_ON(ret <= 0);
printk(KERN_ERR "%s: ext4_ext_get_blocks " printk(KERN_ERR "%s: ext4_ext_map_blocks "
"returned error inode#%lu, block=%u, " "returned error inode#%lu, block=%u, "
"max_blocks=%u", __func__, "max_blocks=%u", __func__,
inode->i_ino, block, max_blocks); inode->i_ino, block, max_blocks);
...@@ -3697,14 +3739,14 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) ...@@ -3697,14 +3739,14 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
ret2 = ext4_journal_stop(handle); ret2 = ext4_journal_stop(handle);
break; break;
} }
if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
blkbits) >> blkbits)) blkbits) >> blkbits))
new_size = offset + len; new_size = offset + len;
else else
new_size = (block + ret) << blkbits; new_size = (map.m_lblk + ret) << blkbits;
ext4_falloc_update_inode(inode, mode, new_size, ext4_falloc_update_inode(inode, mode, new_size,
buffer_new(&map_bh)); (map.m_flags & EXT4_MAP_NEW));
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle); ret2 = ext4_journal_stop(handle);
if (ret2) if (ret2)
...@@ -3733,42 +3775,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, ...@@ -3733,42 +3775,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len) ssize_t len)
{ {
handle_t *handle; handle_t *handle;
ext4_lblk_t block;
unsigned int max_blocks; unsigned int max_blocks;
int ret = 0; int ret = 0;
int ret2 = 0; int ret2 = 0;
struct buffer_head map_bh; struct ext4_map_blocks map;
unsigned int credits, blkbits = inode->i_blkbits; unsigned int credits, blkbits = inode->i_blkbits;
block = offset >> blkbits; map.m_lblk = offset >> blkbits;
/* /*
* We can't just convert len to max_blocks because * We can't just convert len to max_blocks because
* If blocksize = 4096 offset = 3072 and len = 2048 * If blocksize = 4096 offset = 3072 and len = 2048
*/ */
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
- block; map.m_lblk);
/* /*
* credits to insert 1 extent into extent tree * credits to insert 1 extent into extent tree
*/ */
credits = ext4_chunk_trans_blocks(inode, max_blocks); credits = ext4_chunk_trans_blocks(inode, max_blocks);
while (ret >= 0 && ret < max_blocks) { while (ret >= 0 && ret < max_blocks) {
block = block + ret; map.m_lblk += ret;
max_blocks = max_blocks - ret; map.m_len = (max_blocks -= ret);
handle = ext4_journal_start(inode, credits); handle = ext4_journal_start(inode, credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
break; break;
} }
map_bh.b_state = 0; ret = ext4_map_blocks(handle, inode, &map,
ret = ext4_get_blocks(handle, inode, block,
max_blocks, &map_bh,
EXT4_GET_BLOCKS_IO_CONVERT_EXT); EXT4_GET_BLOCKS_IO_CONVERT_EXT);
if (ret <= 0) { if (ret <= 0) {
WARN_ON(ret <= 0); WARN_ON(ret <= 0);
printk(KERN_ERR "%s: ext4_ext_get_blocks " printk(KERN_ERR "%s: ext4_ext_map_blocks "
"returned error inode#%lu, block=%u, " "returned error inode#%lu, block=%u, "
"max_blocks=%u", __func__, "max_blocks=%u", __func__,
inode->i_ino, block, max_blocks); inode->i_ino, map.m_lblk, map.m_len);
} }
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle); ret2 = ext4_journal_stop(handle);
...@@ -3898,7 +3937,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3898,7 +3937,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int error = 0; int error = 0;
/* fallback to generic here if not in extents fmt */ /* fallback to generic here if not in extents fmt */
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return generic_block_fiemap(inode, fieinfo, start, len, return generic_block_fiemap(inode, fieinfo, start, len,
ext4_get_block); ext4_get_block);
......
...@@ -66,7 +66,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -66,7 +66,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
* is smaller than s_maxbytes, which is for extent-mapped files. * is smaller than s_maxbytes, which is for extent-mapped files.
*/ */
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
size_t length = iov_length(iov, nr_segs); size_t length = iov_length(iov, nr_segs);
......
...@@ -34,6 +34,29 @@ ...@@ -34,6 +34,29 @@
#include <trace/events/ext4.h> #include <trace/events/ext4.h>
/*
* If we're not journaling and this is a just-created file, we have to
* sync our parent directory (if it was freshly created) since
* otherwise it will only be written by writeback, leaving a huge
* window during which a crash may lose the file. This may apply for
* the parent directory's parent as well, and so on recursively, if
* they are also freshly created.
*/
static void ext4_sync_parent(struct inode *inode)
{
struct dentry *dentry = NULL;
while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
dentry = list_entry(inode->i_dentry.next,
struct dentry, d_alias);
if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
break;
inode = dentry->d_parent->d_inode;
sync_mapping_buffers(inode->i_mapping);
}
}
/* /*
* akpm: A new design for ext4_sync_file(). * akpm: A new design for ext4_sync_file().
* *
...@@ -66,9 +89,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) ...@@ -66,9 +89,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
ret = flush_completed_IO(inode); ret = flush_completed_IO(inode);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (!journal) if (!journal) {
return simple_fsync(file, dentry, datasync); ret = simple_fsync(file, dentry, datasync);
if (!ret && !list_empty(&inode->i_dentry))
ext4_sync_parent(inode);
return ret;
}
/* /*
* data=writeback,ordered: * data=writeback,ordered:
...@@ -102,7 +129,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) ...@@ -102,7 +129,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
(journal->j_flags & JBD2_BARRIER)) (journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
NULL, BLKDEV_IFL_WAIT); NULL, BLKDEV_IFL_WAIT);
jbd2_log_wait_commit(journal, commit_tid); ret = jbd2_log_wait_commit(journal, commit_tid);
} else if (journal->j_flags & JBD2_BARRIER) } else if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
BLKDEV_IFL_WAIT); BLKDEV_IFL_WAIT);
......
...@@ -240,56 +240,49 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ...@@ -240,56 +240,49 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
if (fatal) if (fatal)
goto error_return; goto error_return;
/* Ok, now we can actually update the inode bitmaps.. */ fatal = -ESRCH;
cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), gdp = ext4_get_group_desc(sb, block_group, &bh2);
bit, bitmap_bh->b_data); if (gdp) {
if (!cleared)
ext4_error(sb, "bit already cleared for inode %lu", ino);
else {
gdp = ext4_get_group_desc(sb, block_group, &bh2);
BUFFER_TRACE(bh2, "get_write_access"); BUFFER_TRACE(bh2, "get_write_access");
fatal = ext4_journal_get_write_access(handle, bh2); fatal = ext4_journal_get_write_access(handle, bh2);
if (fatal) goto error_return; }
ext4_lock_group(sb, block_group);
if (gdp) { cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
ext4_lock_group(sb, block_group); if (fatal || !cleared) {
count = ext4_free_inodes_count(sb, gdp) + 1; ext4_unlock_group(sb, block_group);
ext4_free_inodes_set(sb, gdp, count); goto out;
if (is_directory) { }
count = ext4_used_dirs_count(sb, gdp) - 1;
ext4_used_dirs_set(sb, gdp, count);
if (sbi->s_log_groups_per_flex) {
ext4_group_t f;
f = ext4_flex_group(sbi, block_group);
atomic_dec(&sbi->s_flex_groups[f].used_dirs);
}
} count = ext4_free_inodes_count(sb, gdp) + 1;
gdp->bg_checksum = ext4_group_desc_csum(sbi, ext4_free_inodes_set(sb, gdp, count);
block_group, gdp); if (is_directory) {
ext4_unlock_group(sb, block_group); count = ext4_used_dirs_count(sb, gdp) - 1;
percpu_counter_inc(&sbi->s_freeinodes_counter); ext4_used_dirs_set(sb, gdp, count);
if (is_directory) percpu_counter_dec(&sbi->s_dirs_counter);
percpu_counter_dec(&sbi->s_dirs_counter);
if (sbi->s_log_groups_per_flex) {
ext4_group_t f;
f = ext4_flex_group(sbi, block_group);
atomic_inc(&sbi->s_flex_groups[f].free_inodes);
}
}
BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, bh2);
if (!fatal) fatal = err;
} }
BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); ext4_unlock_group(sb, block_group);
if (!fatal)
fatal = err; percpu_counter_inc(&sbi->s_freeinodes_counter);
sb->s_dirt = 1; if (sbi->s_log_groups_per_flex) {
ext4_group_t f = ext4_flex_group(sbi, block_group);
atomic_inc(&sbi->s_flex_groups[f].free_inodes);
if (is_directory)
atomic_dec(&sbi->s_flex_groups[f].used_dirs);
}
BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
out:
if (cleared) {
BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
sb->s_dirt = 1;
} else
ext4_error(sb, "bit already cleared for inode %lu", ino);
error_return: error_return:
brelse(bitmap_bh); brelse(bitmap_bh);
ext4_std_error(sb, fatal); ext4_std_error(sb, fatal);
...@@ -499,7 +492,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ...@@ -499,7 +492,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
if (S_ISDIR(mode) && if (S_ISDIR(mode) &&
((parent == sb->s_root->d_inode) || ((parent == sb->s_root->d_inode) ||
(EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
int best_ndir = inodes_per_group; int best_ndir = inodes_per_group;
int ret = -1; int ret = -1;
...@@ -1041,7 +1034,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, ...@@ -1041,7 +1034,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
/* set extent flag only for directory, file and normal symlink*/ /* set extent flag only for directory, file and normal symlink*/
if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_ext_tree_init(handle, inode); ext4_ext_tree_init(handle, inode);
} }
} }
......
...@@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, ...@@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
int ret; int ret;
/* /*
* Drop i_data_sem to avoid deadlock with ext4_get_blocks At this * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since * moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by * page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here. * i_mutex. So we can safely drop the i_data_sem here.
...@@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, ...@@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
if (blk && if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) { blk, 1))) {
__ext4_error(inode->i_sb, function, ext4_error_inode(function, inode,
"invalid block reference %u " "invalid block reference %u", blk);
"in inode #%lu", blk, inode->i_ino);
return -EIO; return -EIO;
} }
} }
...@@ -785,7 +784,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, ...@@ -785,7 +784,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
/* Allocation failed, free what we already allocated */ /* Allocation failed, free what we already allocated */
ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
for (i = 1; i <= n ; i++) { for (i = 1; i <= n ; i++) {
/* /*
* branch[i].bh is newly allocated, so there is no * branch[i].bh is newly allocated, so there is no
* need to revoke the block, which is why we don't * need to revoke the block, which is why we don't
* need to set EXT4_FREE_BLOCKS_METADATA. * need to set EXT4_FREE_BLOCKS_METADATA.
...@@ -875,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, ...@@ -875,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
err_out: err_out:
for (i = 1; i <= num; i++) { for (i = 1; i <= num; i++) {
/* /*
* branch[i].bh is newly allocated, so there is no * branch[i].bh is newly allocated, so there is no
* need to revoke the block, which is why we don't * need to revoke the block, which is why we don't
* need to set EXT4_FREE_BLOCKS_METADATA. * need to set EXT4_FREE_BLOCKS_METADATA.
...@@ -890,9 +889,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, ...@@ -890,9 +889,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
} }
/* /*
* The ext4_ind_get_blocks() function handles non-extents inodes * The ext4_ind_map_blocks() function handles non-extents inodes
* (i.e., using the traditional indirect/double-indirect i_blocks * (i.e., using the traditional indirect/double-indirect i_blocks
* scheme) for ext4_get_blocks(). * scheme) for ext4_map_blocks().
* *
* Allocation strategy is simple: if we have to allocate something, we will * Allocation strategy is simple: if we have to allocate something, we will
* have to go the whole way to leaf. So let's do it before attaching anything * have to go the whole way to leaf. So let's do it before attaching anything
...@@ -917,9 +916,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, ...@@ -917,9 +916,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
* down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
* blocks. * blocks.
*/ */
static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, unsigned int maxblocks, struct ext4_map_blocks *map,
struct buffer_head *bh_result,
int flags) int flags)
{ {
int err = -EIO; int err = -EIO;
...@@ -933,9 +931,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, ...@@ -933,9 +931,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
int count = 0; int count = 0;
ext4_fsblk_t first_block = 0; ext4_fsblk_t first_block = 0;
J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, iblock, offsets, depth = ext4_block_to_path(inode, map->m_lblk, offsets,
&blocks_to_boundary); &blocks_to_boundary);
if (depth == 0) if (depth == 0)
...@@ -946,10 +944,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, ...@@ -946,10 +944,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
/* Simplest case - block found, no allocation needed */ /* Simplest case - block found, no allocation needed */
if (!partial) { if (!partial) {
first_block = le32_to_cpu(chain[depth - 1].key); first_block = le32_to_cpu(chain[depth - 1].key);
clear_buffer_new(bh_result);
count++; count++;
/*map more blocks*/ /*map more blocks*/
while (count < maxblocks && count <= blocks_to_boundary) { while (count < map->m_len && count <= blocks_to_boundary) {
ext4_fsblk_t blk; ext4_fsblk_t blk;
blk = le32_to_cpu(*(chain[depth-1].p + count)); blk = le32_to_cpu(*(chain[depth-1].p + count));
...@@ -969,7 +966,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, ...@@ -969,7 +966,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
/* /*
* Okay, we need to do block allocation. * Okay, we need to do block allocation.
*/ */
goal = ext4_find_goal(inode, iblock, partial); goal = ext4_find_goal(inode, map->m_lblk, partial);
/* the number of blocks need to allocate for [d,t]indirect blocks */ /* the number of blocks need to allocate for [d,t]indirect blocks */
indirect_blks = (chain + depth) - partial - 1; indirect_blks = (chain + depth) - partial - 1;
...@@ -979,11 +976,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, ...@@ -979,11 +976,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
* direct blocks to allocate for this branch. * direct blocks to allocate for this branch.
*/ */
count = ext4_blks_to_allocate(partial, indirect_blks, count = ext4_blks_to_allocate(partial, indirect_blks,
maxblocks, blocks_to_boundary); map->m_len, blocks_to_boundary);
/* /*
* Block out ext4_truncate while we alter the tree * Block out ext4_truncate while we alter the tree
*/ */
err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
&count, goal, &count, goal,
offsets + (partial - chain), partial); offsets + (partial - chain), partial);
...@@ -995,18 +992,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, ...@@ -995,18 +992,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
* may need to return -EAGAIN upwards in the worst case. --sct * may need to return -EAGAIN upwards in the worst case. --sct
*/ */
if (!err) if (!err)
err = ext4_splice_branch(handle, inode, iblock, err = ext4_splice_branch(handle, inode, map->m_lblk,
partial, indirect_blks, count); partial, indirect_blks, count);
if (err) if (err)
goto cleanup; goto cleanup;
set_buffer_new(bh_result); map->m_flags |= EXT4_MAP_NEW;
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
got_it: got_it:
map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
map->m_len = count;
if (count > blocks_to_boundary) if (count > blocks_to_boundary)
set_buffer_boundary(bh_result); map->m_flags |= EXT4_MAP_BOUNDARY;
err = count; err = count;
/* Clean up and exit */ /* Clean up and exit */
partial = chain + depth - 1; /* the whole chain */ partial = chain + depth - 1; /* the whole chain */
...@@ -1016,7 +1015,6 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, ...@@ -1016,7 +1015,6 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
brelse(partial->bh); brelse(partial->bh);
partial--; partial--;
} }
BUFFER_TRACE(bh_result, "returned");
out: out:
return err; return err;
} }
...@@ -1061,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, ...@@ -1061,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
*/ */
static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
{ {
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
return ext4_ext_calc_metadata_amount(inode, lblock); return ext4_ext_calc_metadata_amount(inode, lblock);
return ext4_indirect_calc_metadata_amount(inode, lblock); return ext4_indirect_calc_metadata_amount(inode, lblock);
...@@ -1076,7 +1074,6 @@ void ext4_da_update_reserve_space(struct inode *inode, ...@@ -1076,7 +1074,6 @@ void ext4_da_update_reserve_space(struct inode *inode,
{ {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *ei = EXT4_I(inode);
int mdb_free = 0, allocated_meta_blocks = 0;
spin_lock(&ei->i_block_reservation_lock); spin_lock(&ei->i_block_reservation_lock);
trace_ext4_da_update_reserve_space(inode, used); trace_ext4_da_update_reserve_space(inode, used);
...@@ -1091,11 +1088,10 @@ void ext4_da_update_reserve_space(struct inode *inode, ...@@ -1091,11 +1088,10 @@ void ext4_da_update_reserve_space(struct inode *inode,
/* Update per-inode reservations */ /* Update per-inode reservations */
ei->i_reserved_data_blocks -= used; ei->i_reserved_data_blocks -= used;
used += ei->i_allocated_meta_blocks;
ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
allocated_meta_blocks = ei->i_allocated_meta_blocks; percpu_counter_sub(&sbi->s_dirtyblocks_counter,
used + ei->i_allocated_meta_blocks);
ei->i_allocated_meta_blocks = 0; ei->i_allocated_meta_blocks = 0;
percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
if (ei->i_reserved_data_blocks == 0) { if (ei->i_reserved_data_blocks == 0) {
/* /*
...@@ -1103,30 +1099,23 @@ void ext4_da_update_reserve_space(struct inode *inode, ...@@ -1103,30 +1099,23 @@ void ext4_da_update_reserve_space(struct inode *inode,
* only when we have written all of the delayed * only when we have written all of the delayed
* allocation blocks. * allocation blocks.
*/ */
mdb_free = ei->i_reserved_meta_blocks; percpu_counter_sub(&sbi->s_dirtyblocks_counter,
ei->i_reserved_meta_blocks);
ei->i_reserved_meta_blocks = 0; ei->i_reserved_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0; ei->i_da_metadata_calc_len = 0;
percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
} }
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
/* Update quota subsystem */ /* Update quota subsystem for data blocks */
if (quota_claim) { if (quota_claim)
dquot_claim_block(inode, used); dquot_claim_block(inode, used);
if (mdb_free) else {
dquot_release_reservation_block(inode, mdb_free);
} else {
/* /*
* We did fallocate with an offset that is already delayed * We did fallocate with an offset that is already delayed
* allocated. So on delayed allocated writeback we should * allocated. So on delayed allocated writeback we should
* not update the quota for allocated blocks. But then * not re-claim the quota for fallocated blocks.
* converting an fallocate region to initialized region would
* have caused a metadata allocation. So claim quota for
* that
*/ */
if (allocated_meta_blocks) dquot_release_reservation_block(inode, used);
dquot_claim_block(inode, allocated_meta_blocks);
dquot_release_reservation_block(inode, mdb_free + used);
} }
/* /*
...@@ -1139,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode, ...@@ -1139,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
} }
static int check_block_validity(struct inode *inode, const char *msg, static int check_block_validity(struct inode *inode, const char *func,
sector_t logical, sector_t phys, int len) struct ext4_map_blocks *map)
{ {
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
__ext4_error(inode->i_sb, msg, map->m_len)) {
"inode #%lu logical block %llu mapped to %llu " ext4_error_inode(func, inode,
"(size %d)", inode->i_ino, "lblock %lu mapped to illegal pblock %llu "
(unsigned long long) logical, "(length %d)", (unsigned long) map->m_lblk,
(unsigned long long) phys, len); map->m_pblk, map->m_len);
return -EIO; return -EIO;
} }
return 0; return 0;
...@@ -1212,15 +1201,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, ...@@ -1212,15 +1201,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
} }
/* /*
* The ext4_get_blocks() function tries to look up the requested blocks, * The ext4_map_blocks() function tries to look up the requested blocks,
* and returns if the blocks are already mapped. * and returns if the blocks are already mapped.
* *
* Otherwise it takes the write lock of the i_data_sem and allocate blocks * Otherwise it takes the write lock of the i_data_sem and allocate blocks
* and store the allocated blocks in the result buffer head and mark it * and store the allocated blocks in the result buffer head and mark it
* mapped. * mapped.
* *
* If file type is extents based, it will call ext4_ext_get_blocks(), * If file type is extents based, it will call ext4_ext_map_blocks(),
* Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
* based files * based files
* *
* On success, it returns the number of blocks being mapped or allocate. * On success, it returns the number of blocks being mapped or allocate.
...@@ -1233,35 +1222,29 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, ...@@ -1233,35 +1222,29 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
* *
* It returns the error in case of allocation failure. * It returns the error in case of allocation failure.
*/ */
int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, int ext4_map_blocks(handle_t *handle, struct inode *inode,
unsigned int max_blocks, struct buffer_head *bh, struct ext4_map_blocks *map, int flags)
int flags)
{ {
int retval; int retval;
clear_buffer_mapped(bh); map->m_flags = 0;
clear_buffer_unwritten(bh); ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
"logical block %lu\n", inode->i_ino, flags, map->m_len,
ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u," (unsigned long) map->m_lblk);
"logical block %lu\n", inode->i_ino, flags, max_blocks,
(unsigned long)block);
/* /*
* Try to see if we can get the block without requesting a new * Try to see if we can get the block without requesting a new
* file system block. * file system block.
*/ */
down_read((&EXT4_I(inode)->i_data_sem)); down_read((&EXT4_I(inode)->i_data_sem));
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, retval = ext4_ext_map_blocks(handle, inode, map, 0);
bh, 0);
} else { } else {
retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, retval = ext4_ind_map_blocks(handle, inode, map, 0);
bh, 0);
} }
up_read((&EXT4_I(inode)->i_data_sem)); up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, "file system corruption", int ret = check_block_validity(inode, __func__, map);
block, bh->b_blocknr, retval);
if (ret != 0) if (ret != 0)
return ret; return ret;
} }
...@@ -1277,7 +1260,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, ...@@ -1277,7 +1260,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
* ext4_ext_get_block() returns th create = 0 * ext4_ext_get_block() returns th create = 0
* with buffer head unmapped. * with buffer head unmapped.
*/ */
if (retval > 0 && buffer_mapped(bh)) if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
return retval; return retval;
/* /*
...@@ -1290,7 +1273,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, ...@@ -1290,7 +1273,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
* of BH_Unwritten and BH_Mapped flags being simultaneously * of BH_Unwritten and BH_Mapped flags being simultaneously
* set on the buffer_head. * set on the buffer_head.
*/ */
clear_buffer_unwritten(bh); map->m_flags &= ~EXT4_MAP_UNWRITTEN;
/* /*
* New blocks allocate and/or writing to uninitialized extent * New blocks allocate and/or writing to uninitialized extent
...@@ -1312,14 +1295,12 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, ...@@ -1312,14 +1295,12 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
* We need to check for EXT4 here because migrate * We need to check for EXT4 here because migrate
* could have changed the inode type in between * could have changed the inode type in between
*/ */
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, retval = ext4_ext_map_blocks(handle, inode, map, flags);
bh, flags);
} else { } else {
retval = ext4_ind_get_blocks(handle, inode, block, retval = ext4_ind_map_blocks(handle, inode, map, flags);
max_blocks, bh, flags);
if (retval > 0 && buffer_new(bh)) { if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
/* /*
* We allocated new blocks which will result in * We allocated new blocks which will result in
* i_data's format changing. Force the migrate * i_data's format changing. Force the migrate
...@@ -1342,10 +1323,10 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, ...@@ -1342,10 +1323,10 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
EXT4_I(inode)->i_delalloc_reserved_flag = 0; EXT4_I(inode)->i_delalloc_reserved_flag = 0;
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, "file system " int ret = check_block_validity(inode,
"corruption after allocation", "ext4_map_blocks_after_alloc",
block, bh->b_blocknr, retval); map);
if (ret != 0) if (ret != 0)
return ret; return ret;
} }
...@@ -1355,109 +1336,109 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, ...@@ -1355,109 +1336,109 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
/* Maximum number of blocks we map for direct IO at once. */ /* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096 #define DIO_MAX_BLOCKS 4096
int ext4_get_block(struct inode *inode, sector_t iblock, static int _ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh, int flags)
{ {
handle_t *handle = ext4_journal_current_handle(); handle_t *handle = ext4_journal_current_handle();
struct ext4_map_blocks map;
int ret = 0, started = 0; int ret = 0, started = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
int dio_credits; int dio_credits;
if (create && !handle) { map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
if (flags && !handle) {
/* Direct IO write... */ /* Direct IO write... */
if (max_blocks > DIO_MAX_BLOCKS) if (map.m_len > DIO_MAX_BLOCKS)
max_blocks = DIO_MAX_BLOCKS; map.m_len = DIO_MAX_BLOCKS;
dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
handle = ext4_journal_start(inode, dio_credits); handle = ext4_journal_start(inode, dio_credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
goto out; return ret;
} }
started = 1; started = 1;
} }
ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, ret = ext4_map_blocks(handle, inode, &map, flags);
create ? EXT4_GET_BLOCKS_CREATE : 0);
if (ret > 0) { if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits); map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
ret = 0; ret = 0;
} }
if (started) if (started)
ext4_journal_stop(handle); ext4_journal_stop(handle);
out:
return ret; return ret;
} }
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
return _ext4_get_block(inode, iblock, bh,
create ? EXT4_GET_BLOCKS_CREATE : 0);
}
/* /*
* `handle' can be NULL if create is zero * `handle' can be NULL if create is zero
*/ */
struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
ext4_lblk_t block, int create, int *errp) ext4_lblk_t block, int create, int *errp)
{ {
struct buffer_head dummy; struct ext4_map_blocks map;
struct buffer_head *bh;
int fatal = 0, err; int fatal = 0, err;
int flags = 0;
J_ASSERT(handle != NULL || create == 0); J_ASSERT(handle != NULL || create == 0);
dummy.b_state = 0; map.m_lblk = block;
dummy.b_blocknr = -1000; map.m_len = 1;
buffer_trace_init(&dummy.b_history); err = ext4_map_blocks(handle, inode, &map,
if (create) create ? EXT4_GET_BLOCKS_CREATE : 0);
flags |= EXT4_GET_BLOCKS_CREATE;
err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); if (err < 0)
/* *errp = err;
* ext4_get_blocks() returns number of blocks mapped. 0 in if (err <= 0)
* case of a HOLE. return NULL;
*/ *errp = 0;
if (err > 0) {
if (err > 1) bh = sb_getblk(inode->i_sb, map.m_pblk);
WARN_ON(1); if (!bh) {
err = 0; *errp = -EIO;
return NULL;
} }
*errp = err; if (map.m_flags & EXT4_MAP_NEW) {
if (!err && buffer_mapped(&dummy)) { J_ASSERT(create != 0);
struct buffer_head *bh; J_ASSERT(handle != NULL);
bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
if (!bh) {
*errp = -EIO;
goto err;
}
if (buffer_new(&dummy)) {
J_ASSERT(create != 0);
J_ASSERT(handle != NULL);
/* /*
* Now that we do not always journal data, we should * Now that we do not always journal data, we should
* keep in mind whether this should always journal the * keep in mind whether this should always journal the
* new buffer as metadata. For now, regular file * new buffer as metadata. For now, regular file
* writes use ext4_get_block instead, so it's not a * writes use ext4_get_block instead, so it's not a
* problem. * problem.
*/ */
lock_buffer(bh); lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access"); BUFFER_TRACE(bh, "call get_create_access");
fatal = ext4_journal_get_create_access(handle, bh); fatal = ext4_journal_get_create_access(handle, bh);
if (!fatal && !buffer_uptodate(bh)) { if (!fatal && !buffer_uptodate(bh)) {
memset(bh->b_data, 0, inode->i_sb->s_blocksize); memset(bh->b_data, 0, inode->i_sb->s_blocksize);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
}
unlock_buffer(bh);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, inode, bh);
if (!fatal)
fatal = err;
} else {
BUFFER_TRACE(bh, "not a new buffer");
}
if (fatal) {
*errp = fatal;
brelse(bh);
bh = NULL;
} }
return bh; unlock_buffer(bh);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, inode, bh);
if (!fatal)
fatal = err;
} else {
BUFFER_TRACE(bh, "not a new buffer");
} }
err: if (fatal) {
return NULL; *errp = fatal;
brelse(bh);
bh = NULL;
}
return bh;
} }
struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
...@@ -1860,7 +1841,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) ...@@ -1860,7 +1841,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
int retries = 0; int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long md_needed, md_reserved; unsigned long md_needed;
int ret; int ret;
/* /*
...@@ -1870,22 +1851,24 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) ...@@ -1870,22 +1851,24 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
*/ */
repeat: repeat:
spin_lock(&ei->i_block_reservation_lock); spin_lock(&ei->i_block_reservation_lock);
md_reserved = ei->i_reserved_meta_blocks;
md_needed = ext4_calc_metadata_amount(inode, lblock); md_needed = ext4_calc_metadata_amount(inode, lblock);
trace_ext4_da_reserve_space(inode, md_needed); trace_ext4_da_reserve_space(inode, md_needed);
spin_unlock(&ei->i_block_reservation_lock); spin_unlock(&ei->i_block_reservation_lock);
/* /*
* Make quota reservation here to prevent quota overflow * We will charge metadata quota at writeout time; this saves
* later. Real quota accounting is done at pages writeout * us from metadata over-estimation, though we may go over by
* time. * a small amount in the end. Here we just reserve for data.
*/ */
ret = dquot_reserve_block(inode, md_needed + 1); ret = dquot_reserve_block(inode, 1);
if (ret) if (ret)
return ret; return ret;
/*
* We do still charge estimated metadata to the sb though;
* we cannot afford to run out of free blocks.
*/
if (ext4_claim_free_blocks(sbi, md_needed + 1)) { if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
dquot_release_reservation_block(inode, md_needed + 1); dquot_release_reservation_block(inode, 1);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) { if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield(); yield();
goto repeat; goto repeat;
...@@ -1910,6 +1893,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) ...@@ -1910,6 +1893,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
spin_lock(&EXT4_I(inode)->i_block_reservation_lock); spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
trace_ext4_da_release_space(inode, to_free);
if (unlikely(to_free > ei->i_reserved_data_blocks)) { if (unlikely(to_free > ei->i_reserved_data_blocks)) {
/* /*
* if there aren't enough reserved blocks, then the * if there aren't enough reserved blocks, then the
...@@ -1932,12 +1916,13 @@ static void ext4_da_release_space(struct inode *inode, int to_free) ...@@ -1932,12 +1916,13 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
* only when we have written all of the delayed * only when we have written all of the delayed
* allocation blocks. * allocation blocks.
*/ */
to_free += ei->i_reserved_meta_blocks; percpu_counter_sub(&sbi->s_dirtyblocks_counter,
ei->i_reserved_meta_blocks);
ei->i_reserved_meta_blocks = 0; ei->i_reserved_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0; ei->i_da_metadata_calc_len = 0;
} }
/* update fs dirty blocks counter */ /* update fs dirty data blocks counter */
percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
...@@ -2042,28 +2027,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) ...@@ -2042,28 +2027,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
/* /*
* mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
* *
* @mpd->inode - inode to walk through
* @exbh->b_blocknr - first block on a disk
* @exbh->b_size - amount of space in bytes
* @logical - first logical block to start assignment with
*
* the function goes through all passed space and put actual disk * the function goes through all passed space and put actual disk
* block numbers into buffer heads, dropping BH_Delay and BH_Unwritten * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
*/ */
static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
struct buffer_head *exbh) struct ext4_map_blocks *map)
{ {
struct inode *inode = mpd->inode; struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
int blocks = exbh->b_size >> inode->i_blkbits; int blocks = map->m_len;
sector_t pblock = exbh->b_blocknr, cur_logical; sector_t pblock = map->m_pblk, cur_logical;
struct buffer_head *head, *bh; struct buffer_head *head, *bh;
pgoff_t index, end; pgoff_t index, end;
struct pagevec pvec; struct pagevec pvec;
int nr_pages, i; int nr_pages, i;
index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
pagevec_init(&pvec, 0); pagevec_init(&pvec, 0);
...@@ -2090,17 +2070,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, ...@@ -2090,17 +2070,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
/* skip blocks out of the range */ /* skip blocks out of the range */
do { do {
if (cur_logical >= logical) if (cur_logical >= map->m_lblk)
break; break;
cur_logical++; cur_logical++;
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
do { do {
if (cur_logical >= logical + blocks) if (cur_logical >= map->m_lblk + blocks)
break; break;
if (buffer_delay(bh) || if (buffer_delay(bh) || buffer_unwritten(bh)) {
buffer_unwritten(bh)) {
BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
...@@ -2119,7 +2098,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, ...@@ -2119,7 +2098,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
} else if (buffer_mapped(bh)) } else if (buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock); BUG_ON(bh->b_blocknr != pblock);
if (buffer_uninit(exbh)) if (map->m_flags & EXT4_MAP_UNINIT)
set_buffer_uninit(bh); set_buffer_uninit(bh);
cur_logical++; cur_logical++;
pblock++; pblock++;
...@@ -2130,21 +2109,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, ...@@ -2130,21 +2109,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
} }
/*
* __unmap_underlying_blocks - just a helper function to unmap
* set of blocks described by @bh
*/
static inline void __unmap_underlying_blocks(struct inode *inode,
struct buffer_head *bh)
{
struct block_device *bdev = inode->i_sb->s_bdev;
int blocks, i;
blocks = bh->b_size >> inode->i_blkbits;
for (i = 0; i < blocks; i++)
unmap_underlying_metadata(bdev, bh->b_blocknr + i);
}
static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
sector_t logical, long blk_cnt) sector_t logical, long blk_cnt)
{ {
...@@ -2206,7 +2170,7 @@ static void ext4_print_free_blocks(struct inode *inode) ...@@ -2206,7 +2170,7 @@ static void ext4_print_free_blocks(struct inode *inode)
static int mpage_da_map_blocks(struct mpage_da_data *mpd) static int mpage_da_map_blocks(struct mpage_da_data *mpd)
{ {
int err, blks, get_blocks_flags; int err, blks, get_blocks_flags;
struct buffer_head new; struct ext4_map_blocks map;
sector_t next = mpd->b_blocknr; sector_t next = mpd->b_blocknr;
unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
loff_t disksize = EXT4_I(mpd->inode)->i_disksize; loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
...@@ -2247,15 +2211,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) ...@@ -2247,15 +2211,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
* variables are updated after the blocks have been allocated. * variables are updated after the blocks have been allocated.
*/ */
new.b_state = 0; map.m_lblk = next;
map.m_len = max_blocks;
get_blocks_flags = EXT4_GET_BLOCKS_CREATE; get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
if (ext4_should_dioread_nolock(mpd->inode)) if (ext4_should_dioread_nolock(mpd->inode))
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
if (mpd->b_state & (1 << BH_Delay)) if (mpd->b_state & (1 << BH_Delay))
get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
&new, get_blocks_flags);
if (blks < 0) { if (blks < 0) {
err = blks; err = blks;
/* /*
...@@ -2282,7 +2246,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) ...@@ -2282,7 +2246,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
ext4_msg(mpd->inode->i_sb, KERN_CRIT, ext4_msg(mpd->inode->i_sb, KERN_CRIT,
"delayed block allocation failed for inode %lu at " "delayed block allocation failed for inode %lu at "
"logical offset %llu with max blocks %zd with " "logical offset %llu with max blocks %zd with "
"error %d\n", mpd->inode->i_ino, "error %d", mpd->inode->i_ino,
(unsigned long long) next, (unsigned long long) next,
mpd->b_size >> mpd->inode->i_blkbits, err); mpd->b_size >> mpd->inode->i_blkbits, err);
printk(KERN_CRIT "This should not happen!! " printk(KERN_CRIT "This should not happen!! "
...@@ -2297,10 +2261,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) ...@@ -2297,10 +2261,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
} }
BUG_ON(blks == 0); BUG_ON(blks == 0);
new.b_size = (blks << mpd->inode->i_blkbits); if (map.m_flags & EXT4_MAP_NEW) {
struct block_device *bdev = mpd->inode->i_sb->s_bdev;
int i;
if (buffer_new(&new)) for (i = 0; i < map.m_len; i++)
__unmap_underlying_blocks(mpd->inode, &new); unmap_underlying_metadata(bdev, map.m_pblk + i);
}
/* /*
* If blocks are delayed marked, we need to * If blocks are delayed marked, we need to
...@@ -2308,7 +2275,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) ...@@ -2308,7 +2275,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
*/ */
if ((mpd->b_state & (1 << BH_Delay)) || if ((mpd->b_state & (1 << BH_Delay)) ||
(mpd->b_state & (1 << BH_Unwritten))) (mpd->b_state & (1 << BH_Unwritten)))
mpage_put_bnr_to_bhs(mpd, next, &new); mpage_put_bnr_to_bhs(mpd, &map);
if (ext4_should_order_data(mpd->inode)) { if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode); err = ext4_jbd2_file_inode(handle, mpd->inode);
...@@ -2349,8 +2316,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, ...@@ -2349,8 +2316,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
sector_t next; sector_t next;
int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
/*
* XXX Don't go larger than mballoc is willing to allocate
* This is a stopgap solution. We eventually need to fold
* mpage_da_submit_io() into this function and then call
* ext4_get_blocks() multiple times in a loop
*/
if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
goto flush_it;
/* check if thereserved journal credits might overflow */ /* check if thereserved journal credits might overflow */
if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
if (nrblocks >= EXT4_MAX_TRANS_DATA) { if (nrblocks >= EXT4_MAX_TRANS_DATA) {
/* /*
* With non-extent format we are limited by the journal * With non-extent format we are limited by the journal
...@@ -2423,17 +2399,6 @@ static int __mpage_da_writepage(struct page *page, ...@@ -2423,17 +2399,6 @@ static int __mpage_da_writepage(struct page *page,
struct buffer_head *bh, *head; struct buffer_head *bh, *head;
sector_t logical; sector_t logical;
if (mpd->io_done) {
/*
* Rest of the page in the page_vec
* redirty then and skip then. We will
* try to write them again after
* starting a new transaction
*/
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return MPAGE_DA_EXTENT_TAIL;
}
/* /*
* Can we merge this page to current extent? * Can we merge this page to current extent?
*/ */
...@@ -2528,8 +2493,9 @@ static int __mpage_da_writepage(struct page *page, ...@@ -2528,8 +2493,9 @@ static int __mpage_da_writepage(struct page *page,
* initialized properly. * initialized properly.
*/ */
static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh, int create)
{ {
struct ext4_map_blocks map;
int ret = 0; int ret = 0;
sector_t invalid_block = ~((sector_t) 0xffff); sector_t invalid_block = ~((sector_t) 0xffff);
...@@ -2537,16 +2503,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, ...@@ -2537,16 +2503,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
invalid_block = ~0; invalid_block = ~0;
BUG_ON(create == 0); BUG_ON(create == 0);
BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
map.m_lblk = iblock;
map.m_len = 1;
/* /*
* first, we need to know whether the block is allocated already * first, we need to know whether the block is allocated already
* preallocated blocks are unmapped but should treated * preallocated blocks are unmapped but should treated
* the same as allocated blocks. * the same as allocated blocks.
*/ */
ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); ret = ext4_map_blocks(NULL, inode, &map, 0);
if ((ret == 0) && !buffer_delay(bh_result)) { if (ret < 0)
/* the block isn't (pre)allocated yet, let's reserve space */ return ret;
if (ret == 0) {
if (buffer_delay(bh))
return 0; /* Not sure this could or should happen */
/* /*
* XXX: __block_prepare_write() unmaps passed block, * XXX: __block_prepare_write() unmaps passed block,
* is it OK? * is it OK?
...@@ -2556,26 +2528,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, ...@@ -2556,26 +2528,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
/* not enough space to reserve */ /* not enough space to reserve */
return ret; return ret;
map_bh(bh_result, inode->i_sb, invalid_block); map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh_result); set_buffer_new(bh);
set_buffer_delay(bh_result); set_buffer_delay(bh);
} else if (ret > 0) { return 0;
bh_result->b_size = (ret << inode->i_blkbits);
if (buffer_unwritten(bh_result)) {
/* A delayed write to unwritten bh should
* be marked new and mapped. Mapped ensures
* that we don't do get_block multiple times
* when we write to the same offset and new
* ensures that we do proper zero out for
* partial write.
*/
set_buffer_new(bh_result);
set_buffer_mapped(bh_result);
}
ret = 0;
} }
return ret; map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
if (buffer_unwritten(bh)) {
/* A delayed write to unwritten bh should be marked
* new and mapped. Mapped ensures that we don't do
* get_block multiple times when we write to the same
* offset and new ensures that we do proper zero out
* for partial write.
*/
set_buffer_new(bh);
set_buffer_mapped(bh);
}
return 0;
} }
/* /*
...@@ -2597,21 +2569,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, ...@@ -2597,21 +2569,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
static int noalloc_get_block_write(struct inode *inode, sector_t iblock, static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
{ {
int ret = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
return _ext4_get_block(inode, iblock, bh_result, 0);
/*
* we don't want to do block allocation in writepage
* so call get_block_wrap with create = 0
*/
ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
ret = 0;
}
return ret;
} }
static int bget_one(handle_t *handle, struct buffer_head *bh) static int bget_one(handle_t *handle, struct buffer_head *bh)
...@@ -2821,13 +2780,131 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) ...@@ -2821,13 +2780,131 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
* number of contiguous block. So we will limit * number of contiguous block. So we will limit
* number of contiguous block to a sane value * number of contiguous block to a sane value
*/ */
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
(max_blocks > EXT4_MAX_TRANS_DATA)) (max_blocks > EXT4_MAX_TRANS_DATA))
max_blocks = EXT4_MAX_TRANS_DATA; max_blocks = EXT4_MAX_TRANS_DATA;
return ext4_chunk_trans_blocks(inode, max_blocks); return ext4_chunk_trans_blocks(inode, max_blocks);
} }
/*
* write_cache_pages_da - walk the list of dirty pages of the given
* address space and call the callback function (which usually writes
* the pages).
*
* This is a forked version of write_cache_pages(). Differences:
* Range cyclic is ignored.
* no_nrwrite_index_update is always presumed true
*/
static int write_cache_pages_da(struct address_space *mapping,
struct writeback_control *wbc,
struct mpage_da_data *mpd)
{
int ret = 0;
int done = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t index;
pgoff_t end; /* Inclusive */
long nr_to_write = wbc->nr_to_write;
pagevec_init(&pvec, 0);
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
while (!done && (index <= end)) {
int i;
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
/*
* At this point, the page may be truncated or
* invalidated (changing page->mapping to NULL), or
* even swizzled back from swapper_space to tmpfs file
* mapping. However, page->index will not change
* because we have a reference on the page.
*/
if (page->index > end) {
done = 1;
break;
}
lock_page(page);
/*
* Page truncated or invalidated. We can freely skip it
* then, even for data integrity operations: the page
* has disappeared concurrently, so there could be no
* real expectation of this data interity operation
* even if there is now a new, dirty page at the same
* pagecache address.
*/
if (unlikely(page->mapping != mapping)) {
continue_unlock:
unlock_page(page);
continue;
}
if (!PageDirty(page)) {
/* someone wrote it for us */
goto continue_unlock;
}
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE)
wait_on_page_writeback(page);
else
goto continue_unlock;
}
BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
ret = __mpage_da_writepage(page, wbc, mpd);
if (unlikely(ret)) {
if (ret == AOP_WRITEPAGE_ACTIVATE) {
unlock_page(page);
ret = 0;
} else {
done = 1;
break;
}
}
if (nr_to_write > 0) {
nr_to_write--;
if (nr_to_write == 0 &&
wbc->sync_mode == WB_SYNC_NONE) {
/*
* We stop writing back only if we are
* not doing integrity sync. In case of
* integrity sync we have to keep going
* because someone may be concurrently
* dirtying pages, and we might have
* synced a lot of newly appeared dirty
* pages, but have not synced all of the
* old dirty pages.
*/
done = 1;
break;
}
}
}
pagevec_release(&pvec);
cond_resched();
}
return ret;
}
static int ext4_da_writepages(struct address_space *mapping, static int ext4_da_writepages(struct address_space *mapping,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
...@@ -2836,7 +2913,6 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2836,7 +2913,6 @@ static int ext4_da_writepages(struct address_space *mapping,
handle_t *handle = NULL; handle_t *handle = NULL;
struct mpage_da_data mpd; struct mpage_da_data mpd;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
int no_nrwrite_index_update;
int pages_written = 0; int pages_written = 0;
long pages_skipped; long pages_skipped;
unsigned int max_pages; unsigned int max_pages;
...@@ -2916,12 +2992,6 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2916,12 +2992,6 @@ static int ext4_da_writepages(struct address_space *mapping,
mpd.wbc = wbc; mpd.wbc = wbc;
mpd.inode = mapping->host; mpd.inode = mapping->host;
/*
* we don't want write_cache_pages to update
* nr_to_write and writeback_index
*/
no_nrwrite_index_update = wbc->no_nrwrite_index_update;
wbc->no_nrwrite_index_update = 1;
pages_skipped = wbc->pages_skipped; pages_skipped = wbc->pages_skipped;
retry: retry:
...@@ -2941,7 +3011,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2941,7 +3011,7 @@ static int ext4_da_writepages(struct address_space *mapping,
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
"%ld pages, ino %lu; err %d\n", __func__, "%ld pages, ino %lu; err %d", __func__,
wbc->nr_to_write, inode->i_ino, ret); wbc->nr_to_write, inode->i_ino, ret);
goto out_writepages; goto out_writepages;
} }
...@@ -2963,8 +3033,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2963,8 +3033,7 @@ static int ext4_da_writepages(struct address_space *mapping,
mpd.io_done = 0; mpd.io_done = 0;
mpd.pages_written = 0; mpd.pages_written = 0;
mpd.retval = 0; mpd.retval = 0;
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, ret = write_cache_pages_da(mapping, wbc, &mpd);
&mpd);
/* /*
* If we have a contiguous extent of pages and we * If we have a contiguous extent of pages and we
* haven't done the I/O yet, map the blocks and submit * haven't done the I/O yet, map the blocks and submit
...@@ -3016,7 +3085,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -3016,7 +3085,7 @@ static int ext4_da_writepages(struct address_space *mapping,
if (pages_skipped != wbc->pages_skipped) if (pages_skipped != wbc->pages_skipped)
ext4_msg(inode->i_sb, KERN_CRIT, ext4_msg(inode->i_sb, KERN_CRIT,
"This should not happen leaving %s " "This should not happen leaving %s "
"with nr_to_write = %ld ret = %d\n", "with nr_to_write = %ld ret = %d",
__func__, wbc->nr_to_write, ret); __func__, wbc->nr_to_write, ret);
/* Update index */ /* Update index */
...@@ -3030,8 +3099,6 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -3030,8 +3099,6 @@ static int ext4_da_writepages(struct address_space *mapping,
mapping->writeback_index = index; mapping->writeback_index = index;
out_writepages: out_writepages:
if (!no_nrwrite_index_update)
wbc->no_nrwrite_index_update = 0;
wbc->nr_to_write -= nr_to_writebump; wbc->nr_to_write -= nr_to_writebump;
wbc->range_start = range_start; wbc->range_start = range_start;
trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
...@@ -3076,7 +3143,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, ...@@ -3076,7 +3143,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata) struct page **pagep, void **fsdata)
{ {
int ret, retries = 0, quota_retries = 0; int ret, retries = 0;
struct page *page; struct page *page;
pgoff_t index; pgoff_t index;
unsigned from, to; unsigned from, to;
...@@ -3135,22 +3202,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, ...@@ -3135,22 +3202,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry; goto retry;
if ((ret == -EDQUOT) &&
EXT4_I(inode)->i_reserved_meta_blocks &&
(quota_retries++ < 3)) {
/*
* Since we often over-estimate the number of meta
* data blocks required, we may sometimes get a
* spurios out of quota error even though there would
* be enough space once we write the data blocks and
* find out how many meta data blocks were _really_
* required. So try forcing the inode write to see if
* that helps.
*/
write_inode_now(inode, (quota_retries == 3));
goto retry;
}
out: out:
return ret; return ret;
} }
...@@ -3546,46 +3597,18 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, ...@@ -3546,46 +3597,18 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
return ret; return ret;
} }
/*
* ext4_get_block used when preparing for a DIO write or buffer write.
* We allocate an uinitialized extent if blocks haven't been allocated.
* The extent will be converted to initialized after the IO is complete.
*/
static int ext4_get_block_write(struct inode *inode, sector_t iblock, static int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
{ {
handle_t *handle = ext4_journal_current_handle();
int ret = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
int dio_credits;
int started = 0;
ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
inode->i_ino, create); inode->i_ino, create);
/* return _ext4_get_block(inode, iblock, bh_result,
* ext4_get_block in prepare for a DIO write or buffer write. EXT4_GET_BLOCKS_IO_CREATE_EXT);
* We allocate an uinitialized extent if blocks haven't been allocated.
* The extent will be converted to initialized after IO complete.
*/
create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
if (!handle) {
if (max_blocks > DIO_MAX_BLOCKS)
max_blocks = DIO_MAX_BLOCKS;
dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
handle = ext4_journal_start(inode, dio_credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
started = 1;
}
ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
create);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
ret = 0;
}
if (started)
ext4_journal_stop(handle);
out:
return ret;
} }
static void dump_completed_IO(struct inode * inode) static void dump_completed_IO(struct inode * inode)
...@@ -3973,7 +3996,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, ...@@ -3973,7 +3996,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
...@@ -4302,10 +4325,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, ...@@ -4302,10 +4325,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
count)) { count)) {
ext4_error(inode->i_sb, "inode #%lu: " EXT4_ERROR_INODE(inode, "attempt to clear invalid "
"attempt to clear blocks %llu len %lu, invalid", "blocks %llu len %lu",
inode->i_ino, (unsigned long long) block_to_free, (unsigned long long) block_to_free, count);
count);
return 1; return 1;
} }
...@@ -4410,11 +4432,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, ...@@ -4410,11 +4432,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
ext4_handle_dirty_metadata(handle, inode, this_bh); ext4_handle_dirty_metadata(handle, inode, this_bh);
else else
ext4_error(inode->i_sb, EXT4_ERROR_INODE(inode,
"circular indirect block detected, " "circular indirect block detected at "
"inode=%lu, block=%llu", "block %llu",
inode->i_ino, (unsigned long long) this_bh->b_blocknr);
(unsigned long long) this_bh->b_blocknr);
} }
} }
...@@ -4452,11 +4473,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, ...@@ -4452,11 +4473,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
nr, 1)) { nr, 1)) {
ext4_error(inode->i_sb, EXT4_ERROR_INODE(inode,
"indirect mapped block in inode " "invalid indirect mapped "
"#%lu invalid (level %d, blk #%lu)", "block %lu (level %d)",
inode->i_ino, depth, (unsigned long) nr, depth);
(unsigned long) nr);
break; break;
} }
...@@ -4468,9 +4488,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, ...@@ -4468,9 +4488,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
* (should be rare). * (should be rare).
*/ */
if (!bh) { if (!bh) {
ext4_error(inode->i_sb, EXT4_ERROR_INODE(inode,
"Read failure, inode=%lu, block=%llu", "Read failure block=%llu",
inode->i_ino, nr); (unsigned long long) nr);
continue; continue;
} }
...@@ -4612,12 +4632,12 @@ void ext4_truncate(struct inode *inode) ...@@ -4612,12 +4632,12 @@ void ext4_truncate(struct inode *inode)
if (!ext4_can_truncate(inode)) if (!ext4_can_truncate(inode))
return; return;
EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ext4_ext_truncate(inode); ext4_ext_truncate(inode);
return; return;
} }
...@@ -4785,8 +4805,8 @@ static int __ext4_get_inode_loc(struct inode *inode, ...@@ -4785,8 +4805,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
bh = sb_getblk(sb, block); bh = sb_getblk(sb, block);
if (!bh) { if (!bh) {
ext4_error(sb, "unable to read inode block - " EXT4_ERROR_INODE(inode, "unable to read inode block - "
"inode=%lu, block=%llu", inode->i_ino, block); "block %llu", block);
return -EIO; return -EIO;
} }
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
...@@ -4884,8 +4904,8 @@ static int __ext4_get_inode_loc(struct inode *inode, ...@@ -4884,8 +4904,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
submit_bh(READ_META, bh); submit_bh(READ_META, bh);
wait_on_buffer(bh); wait_on_buffer(bh);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
ext4_error(sb, "unable to read inode block - inode=%lu," EXT4_ERROR_INODE(inode, "unable to read inode "
" block=%llu", inode->i_ino, block); "block %llu", block);
brelse(bh); brelse(bh);
return -EIO; return -EIO;
} }
...@@ -5096,8 +5116,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ...@@ -5096,8 +5116,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0; ret = 0;
if (ei->i_file_acl && if (ei->i_file_acl &&
!ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
ext4_error(sb, "bad extended attribute block %llu inode #%lu", EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
ei->i_file_acl, inode->i_ino); ei->i_file_acl);
ret = -EIO; ret = -EIO;
goto bad_inode; goto bad_inode;
} else if (ei->i_flags & EXT4_EXTENTS_FL) { } else if (ei->i_flags & EXT4_EXTENTS_FL) {
...@@ -5142,8 +5162,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ...@@ -5142,8 +5162,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
} else { } else {
ret = -EIO; ret = -EIO;
ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
inode->i_mode, inode->i_ino);
goto bad_inode; goto bad_inode;
} }
brelse(iloc.bh); brelse(iloc.bh);
...@@ -5381,9 +5400,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -5381,9 +5400,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode == WB_SYNC_ALL) if (wbc->sync_mode == WB_SYNC_ALL)
sync_dirty_buffer(iloc.bh); sync_dirty_buffer(iloc.bh);
if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
ext4_error(inode->i_sb, "IO error syncing inode, " EXT4_ERROR_INODE(inode,
"inode=%lu, block=%llu", inode->i_ino, "IO error syncing inode (block=%llu)",
(unsigned long long)iloc.bh->b_blocknr); (unsigned long long) iloc.bh->b_blocknr);
err = -EIO; err = -EIO;
} }
brelse(iloc.bh); brelse(iloc.bh);
...@@ -5455,7 +5474,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -5455,7 +5474,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
} }
if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_valid & ATTR_SIZE) {
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (attr->ia_size > sbi->s_bitmap_maxbytes) { if (attr->ia_size > sbi->s_bitmap_maxbytes) {
...@@ -5468,7 +5487,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -5468,7 +5487,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (S_ISREG(inode->i_mode) && if (S_ISREG(inode->i_mode) &&
attr->ia_valid & ATTR_SIZE && attr->ia_valid & ATTR_SIZE &&
(attr->ia_size < inode->i_size || (attr->ia_size < inode->i_size ||
(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
handle_t *handle; handle_t *handle;
handle = ext4_journal_start(inode, 3); handle = ext4_journal_start(inode, 3);
...@@ -5500,7 +5519,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -5500,7 +5519,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
} }
} }
/* ext4_truncate will clear the flag */ /* ext4_truncate will clear the flag */
if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
ext4_truncate(inode); ext4_truncate(inode);
} }
...@@ -5576,7 +5595,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, ...@@ -5576,7 +5595,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{ {
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return ext4_indirect_trans_blocks(inode, nrblocks, chunk); return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
} }
...@@ -5911,9 +5930,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5911,9 +5930,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
*/ */
if (val) if (val)
EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
else else
EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
ext4_set_aops(inode); ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
......
...@@ -258,7 +258,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -258,7 +258,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (me.moved_len > 0) if (me.moved_len > 0)
file_remove_suid(donor_filp); file_remove_suid(donor_filp);
if (copy_to_user((struct move_extent __user *)arg, if (copy_to_user((struct move_extent __user *)arg,
&me, sizeof(me))) &me, sizeof(me)))
err = -EFAULT; err = -EFAULT;
mext_out: mext_out:
...@@ -373,7 +373,30 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -373,7 +373,30 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC32_SETRSVSZ: case EXT4_IOC32_SETRSVSZ:
cmd = EXT4_IOC_SETRSVSZ; cmd = EXT4_IOC_SETRSVSZ;
break; break;
case EXT4_IOC_GROUP_ADD: case EXT4_IOC32_GROUP_ADD: {
struct compat_ext4_new_group_input __user *uinput;
struct ext4_new_group_input input;
mm_segment_t old_fs;
int err;
uinput = compat_ptr(arg);
err = get_user(input.group, &uinput->group);
err |= get_user(input.block_bitmap, &uinput->block_bitmap);
err |= get_user(input.inode_bitmap, &uinput->inode_bitmap);
err |= get_user(input.inode_table, &uinput->inode_table);
err |= get_user(input.blocks_count, &uinput->blocks_count);
err |= get_user(input.reserved_blocks,
&uinput->reserved_blocks);
if (err)
return -EFAULT;
old_fs = get_fs();
set_fs(KERNEL_DS);
err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
(unsigned long) &input);
set_fs(old_fs);
return err;
}
case EXT4_IOC_MOVE_EXT:
break; break;
default: default:
return -ENOIOCTLCMD; return -ENOIOCTLCMD;
......
...@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, ...@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
} }
} }
/*
* Cache the order of the largest free extent we have available in this block
* group.
*/
static void
mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
{
int i;
int bits;
grp->bb_largest_free_order = -1; /* uninit */
bits = sb->s_blocksize_bits + 1;
for (i = bits; i >= 0; i--) {
if (grp->bb_counters[i] > 0) {
grp->bb_largest_free_order = i;
break;
}
}
}
static noinline_for_stack static noinline_for_stack
void ext4_mb_generate_buddy(struct super_block *sb, void ext4_mb_generate_buddy(struct super_block *sb,
void *buddy, void *bitmap, ext4_group_t group) void *buddy, void *bitmap, ext4_group_t group)
...@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, ...@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
*/ */
grp->bb_free = free; grp->bb_free = free;
} }
mb_set_largest_free_order(sb, grp);
clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
...@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb, ...@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
* contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks.
* So it can have information regarding groups_per_page which * So it can have information regarding groups_per_page which
* is blocks_per_page/2 * is blocks_per_page/2
*
* Locking note: This routine takes the block group lock of all groups
* for this page; do not hold this lock when calling this routine!
*/ */
static int ext4_mb_init_cache(struct page *page, char *incore) static int ext4_mb_init_cache(struct page *page, char *incore)
...@@ -865,6 +890,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) ...@@ -865,6 +890,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
BUG_ON(incore == NULL); BUG_ON(incore == NULL);
mb_debug(1, "put buddy for group %u in page %lu/%x\n", mb_debug(1, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize); group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
grinfo = ext4_get_group_info(sb, group); grinfo = ext4_get_group_info(sb, group);
grinfo->bb_fragments = 0; grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0, memset(grinfo->bb_counters, 0,
...@@ -882,6 +908,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) ...@@ -882,6 +908,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
BUG_ON(incore != NULL); BUG_ON(incore != NULL);
mb_debug(1, "put bitmap for group %u in page %lu/%x\n", mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
group, page->index, i * blocksize); group, page->index, i * blocksize);
trace_ext4_mb_bitmap_load(sb, group);
/* see comments in ext4_mb_put_pa() */ /* see comments in ext4_mb_put_pa() */
ext4_lock_group(sb, group); ext4_lock_group(sb, group);
...@@ -910,6 +937,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) ...@@ -910,6 +937,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
return err; return err;
} }
/*
* Locking note: This routine calls ext4_mb_init_cache(), which takes the
* block group lock of all groups for this page; do not hold the BG lock when
* calling this routine!
*/
static noinline_for_stack static noinline_for_stack
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
{ {
...@@ -1004,6 +1036,11 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ...@@ -1004,6 +1036,11 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
return ret; return ret;
} }
/*
* Locking note: This routine calls ext4_mb_init_cache(), which takes the
* block group lock of all groups for this page; do not hold the BG lock when
* calling this routine!
*/
static noinline_for_stack int static noinline_for_stack int
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
struct ext4_buddy *e4b) struct ext4_buddy *e4b)
...@@ -1150,7 +1187,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ...@@ -1150,7 +1187,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
return ret; return ret;
} }
static void ext4_mb_release_desc(struct ext4_buddy *e4b) static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
{ {
if (e4b->bd_bitmap_page) if (e4b->bd_bitmap_page)
page_cache_release(e4b->bd_bitmap_page); page_cache_release(e4b->bd_bitmap_page);
...@@ -1299,6 +1336,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, ...@@ -1299,6 +1336,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
buddy = buddy2; buddy = buddy2;
} while (1); } while (1);
} }
mb_set_largest_free_order(sb, e4b->bd_info);
mb_check_buddy(e4b); mb_check_buddy(e4b);
} }
...@@ -1427,6 +1465,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) ...@@ -1427,6 +1465,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
e4b->bd_info->bb_counters[ord]++; e4b->bd_info->bb_counters[ord]++;
e4b->bd_info->bb_counters[ord]++; e4b->bd_info->bb_counters[ord]++;
} }
mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
mb_check_buddy(e4b); mb_check_buddy(e4b);
...@@ -1617,7 +1656,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, ...@@ -1617,7 +1656,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
} }
ext4_unlock_group(ac->ac_sb, group); ext4_unlock_group(ac->ac_sb, group);
ext4_mb_release_desc(e4b); ext4_mb_unload_buddy(e4b);
return 0; return 0;
} }
...@@ -1672,7 +1711,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ...@@ -1672,7 +1711,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b); ext4_mb_use_best_found(ac, e4b);
} }
ext4_unlock_group(ac->ac_sb, group); ext4_unlock_group(ac->ac_sb, group);
ext4_mb_release_desc(e4b); ext4_mb_unload_buddy(e4b);
return 0; return 0;
} }
...@@ -1821,16 +1860,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, ...@@ -1821,16 +1860,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
} }
} }
/* This is now called BEFORE we load the buddy bitmap. */
static int ext4_mb_good_group(struct ext4_allocation_context *ac, static int ext4_mb_good_group(struct ext4_allocation_context *ac,
ext4_group_t group, int cr) ext4_group_t group, int cr)
{ {
unsigned free, fragments; unsigned free, fragments;
unsigned i, bits;
int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
BUG_ON(cr < 0 || cr >= 4); BUG_ON(cr < 0 || cr >= 4);
BUG_ON(EXT4_MB_GRP_NEED_INIT(grp));
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
int ret = ext4_mb_init_group(ac->ac_sb, group);
if (ret)
return 0;
}
free = grp->bb_free; free = grp->bb_free;
fragments = grp->bb_fragments; fragments = grp->bb_fragments;
...@@ -1843,17 +1888,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, ...@@ -1843,17 +1888,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
case 0: case 0:
BUG_ON(ac->ac_2order == 0); BUG_ON(ac->ac_2order == 0);
if (grp->bb_largest_free_order < ac->ac_2order)
return 0;
/* Avoid using the first bg of a flexgroup for data files */ /* Avoid using the first bg of a flexgroup for data files */
if ((ac->ac_flags & EXT4_MB_HINT_DATA) && if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
(flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
((group % flex_size) == 0)) ((group % flex_size) == 0))
return 0; return 0;
bits = ac->ac_sb->s_blocksize_bits + 1; return 1;
for (i = ac->ac_2order; i <= bits; i++)
if (grp->bb_counters[i] > 0)
return 1;
break;
case 1: case 1:
if ((free / fragments) >= ac->ac_g_ex.fe_len) if ((free / fragments) >= ac->ac_g_ex.fe_len)
return 1; return 1;
...@@ -1964,7 +2008,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) ...@@ -1964,7 +2008,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
ngroups = ext4_get_groups_count(sb); ngroups = ext4_get_groups_count(sb);
/* non-extent files are limited to low blocks/groups */ /* non-extent files are limited to low blocks/groups */
if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
ngroups = sbi->s_blockfile_groups; ngroups = sbi->s_blockfile_groups;
BUG_ON(ac->ac_status == AC_STATUS_FOUND); BUG_ON(ac->ac_status == AC_STATUS_FOUND);
...@@ -2024,15 +2068,11 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) ...@@ -2024,15 +2068,11 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
group = ac->ac_g_ex.fe_group; group = ac->ac_g_ex.fe_group;
for (i = 0; i < ngroups; group++, i++) { for (i = 0; i < ngroups; group++, i++) {
struct ext4_group_info *grp;
struct ext4_group_desc *desc;
if (group == ngroups) if (group == ngroups)
group = 0; group = 0;
/* quick check to skip empty groups */ /* This now checks without needing the buddy page */
grp = ext4_get_group_info(sb, group); if (!ext4_mb_good_group(ac, group, cr))
if (grp->bb_free == 0)
continue; continue;
err = ext4_mb_load_buddy(sb, group, &e4b); err = ext4_mb_load_buddy(sb, group, &e4b);
...@@ -2040,15 +2080,18 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) ...@@ -2040,15 +2080,18 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
goto out; goto out;
ext4_lock_group(sb, group); ext4_lock_group(sb, group);
/*
* We need to check again after locking the
* block group
*/
if (!ext4_mb_good_group(ac, group, cr)) { if (!ext4_mb_good_group(ac, group, cr)) {
/* someone did allocation from this group */
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
continue; continue;
} }
ac->ac_groups_scanned++; ac->ac_groups_scanned++;
desc = ext4_get_group_desc(sb, group, NULL);
if (cr == 0) if (cr == 0)
ext4_mb_simple_scan_group(ac, &e4b); ext4_mb_simple_scan_group(ac, &e4b);
else if (cr == 1 && else if (cr == 1 &&
...@@ -2058,7 +2101,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) ...@@ -2058,7 +2101,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ext4_mb_complex_scan_group(ac, &e4b); ext4_mb_complex_scan_group(ac, &e4b);
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
if (ac->ac_status != AC_STATUS_CONTINUE) if (ac->ac_status != AC_STATUS_CONTINUE)
break; break;
...@@ -2148,7 +2191,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) ...@@ -2148,7 +2191,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
ext4_lock_group(sb, group); ext4_lock_group(sb, group);
memcpy(&sg, ext4_get_group_info(sb, group), i); memcpy(&sg, ext4_get_group_info(sb, group), i);
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
sg.info.bb_fragments, sg.info.bb_first_free); sg.info.bb_fragments, sg.info.bb_first_free);
...@@ -2255,6 +2298,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, ...@@ -2255,6 +2298,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
init_rwsem(&meta_group_info[i]->alloc_sem); init_rwsem(&meta_group_info[i]->alloc_sem);
meta_group_info[i]->bb_free_root = RB_ROOT; meta_group_info[i]->bb_free_root = RB_ROOT;
meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
#ifdef DOUBLE_CHECK #ifdef DOUBLE_CHECK
{ {
...@@ -2536,6 +2580,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) ...@@ -2536,6 +2580,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
entry->count, entry->group, entry); entry->count, entry->group, entry);
if (test_opt(sb, DISCARD)) { if (test_opt(sb, DISCARD)) {
int ret;
ext4_fsblk_t discard_block; ext4_fsblk_t discard_block;
discard_block = entry->start_blk + discard_block = entry->start_blk +
...@@ -2543,7 +2588,12 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) ...@@ -2543,7 +2588,12 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
trace_ext4_discard_blocks(sb, trace_ext4_discard_blocks(sb,
(unsigned long long)discard_block, (unsigned long long)discard_block,
entry->count); entry->count);
sb_issue_discard(sb, discard_block, entry->count); ret = sb_issue_discard(sb, discard_block, entry->count);
if (ret == EOPNOTSUPP) {
ext4_warning(sb,
"discard not supported, disabling");
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
}
} }
err = ext4_mb_load_buddy(sb, entry->group, &e4b); err = ext4_mb_load_buddy(sb, entry->group, &e4b);
...@@ -2568,7 +2618,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) ...@@ -2568,7 +2618,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
} }
ext4_unlock_group(sb, entry->group); ext4_unlock_group(sb, entry->group);
kmem_cache_free(ext4_free_ext_cachep, entry); kmem_cache_free(ext4_free_ext_cachep, entry);
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
} }
mb_debug(1, "freed %u blocks in %u structures\n", count, count2); mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
...@@ -2641,7 +2691,7 @@ int __init init_ext4_mballoc(void) ...@@ -2641,7 +2691,7 @@ int __init init_ext4_mballoc(void)
void exit_ext4_mballoc(void) void exit_ext4_mballoc(void)
{ {
/* /*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep * Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache. * before destroying the slab cache.
*/ */
...@@ -2981,7 +3031,7 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) ...@@ -2981,7 +3031,7 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
atomic_inc(&sbi->s_bal_reqs); atomic_inc(&sbi->s_bal_reqs);
atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len) if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
atomic_inc(&sbi->s_bal_success); atomic_inc(&sbi->s_bal_success);
atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
...@@ -3123,7 +3173,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) ...@@ -3123,7 +3173,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
continue; continue;
/* non-extent files can't have physical blocks past 2^32 */ /* non-extent files can't have physical blocks past 2^32 */
if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
continue; continue;
...@@ -3280,7 +3330,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, ...@@ -3280,7 +3330,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
spin_unlock(&pa->pa_lock); spin_unlock(&pa->pa_lock);
grp_blk = pa->pa_pstart; grp_blk = pa->pa_pstart;
/* /*
* If doing group-based preallocation, pa_pstart may be in the * If doing group-based preallocation, pa_pstart may be in the
* next group when pa is used up * next group when pa is used up
*/ */
...@@ -3697,7 +3747,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, ...@@ -3697,7 +3747,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
if (ac) if (ac)
kmem_cache_free(ext4_ac_cachep, ac); kmem_cache_free(ext4_ac_cachep, ac);
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
put_bh(bitmap_bh); put_bh(bitmap_bh);
return free; return free;
} }
...@@ -3801,7 +3851,7 @@ void ext4_discard_preallocations(struct inode *inode) ...@@ -3801,7 +3851,7 @@ void ext4_discard_preallocations(struct inode *inode)
if (bitmap_bh == NULL) { if (bitmap_bh == NULL) {
ext4_error(sb, "Error reading block bitmap for %u", ext4_error(sb, "Error reading block bitmap for %u",
group); group);
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
continue; continue;
} }
...@@ -3810,7 +3860,7 @@ void ext4_discard_preallocations(struct inode *inode) ...@@ -3810,7 +3860,7 @@ void ext4_discard_preallocations(struct inode *inode)
ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
put_bh(bitmap_bh); put_bh(bitmap_bh);
list_del(&pa->u.pa_tmp_list); list_del(&pa->u.pa_tmp_list);
...@@ -4074,7 +4124,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, ...@@ -4074,7 +4124,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
ext4_mb_release_group_pa(&e4b, pa, ac); ext4_mb_release_group_pa(&e4b, pa, ac);
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
list_del(&pa->u.pa_tmp_list); list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
} }
...@@ -4484,12 +4534,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4484,12 +4534,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!bh) if (!bh)
tbh = sb_find_get_block(inode->i_sb, tbh = sb_find_get_block(inode->i_sb,
block + i); block + i);
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
inode, tbh, block + i); inode, tbh, block + i);
} }
} }
/* /*
* We need to make sure we don't reuse the freed block until * We need to make sure we don't reuse the freed block until
* after the transaction is committed, which we can do by * after the transaction is committed, which we can do by
* treating the block as metadata, below. We make an * treating the block as metadata, below. We make an
...@@ -4610,7 +4660,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4610,7 +4660,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
} }
ext4_mb_release_desc(&e4b); ext4_mb_unload_buddy(&e4b);
freed += count; freed += count;
......
...@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode) ...@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode)
*/ */
if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_INCOMPAT_EXTENTS) || EXT4_FEATURE_INCOMPAT_EXTENTS) ||
(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return -EINVAL; return -EINVAL;
if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
......
...@@ -482,6 +482,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, ...@@ -482,6 +482,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
int depth = ext_depth(orig_inode); int depth = ext_depth(orig_inode);
int ret; int ret;
start_ext.ee_block = end_ext.ee_block = 0;
o_start = o_end = oext = orig_path[depth].p_ext; o_start = o_end = oext = orig_path[depth].p_ext;
oext_alen = ext4_ext_get_actual_len(oext); oext_alen = ext4_ext_get_actual_len(oext);
start_ext.ee_len = end_ext.ee_len = 0; start_ext.ee_len = end_ext.ee_len = 0;
...@@ -529,7 +530,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, ...@@ -529,7 +530,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
* new_ext |-------| * new_ext |-------|
*/ */
if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
ext4_error(orig_inode->i_sb, EXT4_ERROR_INODE(orig_inode,
"new_ext_end(%u) should be less than or equal to " "new_ext_end(%u) should be less than or equal to "
"oext->ee_block(%u) + oext_alen(%d) - 1", "oext->ee_block(%u) + oext_alen(%d) - 1",
new_ext_end, le32_to_cpu(oext->ee_block), new_ext_end, le32_to_cpu(oext->ee_block),
...@@ -692,12 +693,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, ...@@ -692,12 +693,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
while (1) { while (1) {
/* The extent for donor must be found. */ /* The extent for donor must be found. */
if (!dext) { if (!dext) {
ext4_error(donor_inode->i_sb, EXT4_ERROR_INODE(donor_inode,
"The extent for donor must be found"); "The extent for donor must be found");
*err = -EIO; *err = -EIO;
goto out; goto out;
} else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
ext4_error(donor_inode->i_sb, EXT4_ERROR_INODE(donor_inode,
"Donor offset(%u) and the first block of donor " "Donor offset(%u) and the first block of donor "
"extent(%u) should be equal", "extent(%u) should be equal",
donor_off, donor_off,
...@@ -976,11 +977,11 @@ mext_check_arguments(struct inode *orig_inode, ...@@ -976,11 +977,11 @@ mext_check_arguments(struct inode *orig_inode,
} }
/* Ext4 move extent supports only extent based file */ /* Ext4 move extent supports only extent based file */
if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) { if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
ext4_debug("ext4 move extent: orig file is not extents " ext4_debug("ext4 move extent: orig file is not extents "
"based file [ino:orig %lu]\n", orig_inode->i_ino); "based file [ino:orig %lu]\n", orig_inode->i_ino);
return -EOPNOTSUPP; return -EOPNOTSUPP;
} else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) { } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
ext4_debug("ext4 move extent: donor file is not extents " ext4_debug("ext4 move extent: donor file is not extents "
"based file [ino:donor %lu]\n", donor_inode->i_ino); "based file [ino:donor %lu]\n", donor_inode->i_ino);
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -1354,7 +1355,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, ...@@ -1354,7 +1355,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
if (ret1 < 0) if (ret1 < 0)
break; break;
if (*moved_len > len) { if (*moved_len > len) {
ext4_error(orig_inode->i_sb, EXT4_ERROR_INODE(orig_inode,
"We replaced blocks too much! " "We replaced blocks too much! "
"sum of replaced: %llu requested: %llu", "sum of replaced: %llu requested: %llu",
*moved_len, len); *moved_len, len);
......
...@@ -187,7 +187,7 @@ unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) ...@@ -187,7 +187,7 @@ unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
return blocksize; return blocksize;
return (len & 65532) | ((len & 3) << 16); return (len & 65532) | ((len & 3) << 16);
} }
__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
{ {
if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
...@@ -197,7 +197,7 @@ __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) ...@@ -197,7 +197,7 @@ __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
if (len == blocksize) { if (len == blocksize) {
if (blocksize == 65536) if (blocksize == 65536)
return cpu_to_le16(EXT4_MAX_REC_LEN); return cpu_to_le16(EXT4_MAX_REC_LEN);
else else
return cpu_to_le16(0); return cpu_to_le16(0);
} }
return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
...@@ -349,7 +349,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, ...@@ -349,7 +349,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
brelse(bh); brelse(bh);
} }
if (bcount) if (bcount)
printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
levels ? "" : " ", names, space/bcount, levels ? "" : " ", names, space/bcount,
(space/bcount)*100/blocksize); (space/bcount)*100/blocksize);
return (struct stats) { names, space, bcount}; return (struct stats) { names, space, bcount};
...@@ -653,10 +653,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, ...@@ -653,10 +653,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
int ret, err; int ret, err;
__u32 hashval; __u32 hashval;
dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
start_hash, start_minor_hash)); start_hash, start_minor_hash));
dir = dir_file->f_path.dentry->d_inode; dir = dir_file->f_path.dentry->d_inode;
if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
if (hinfo.hash_version <= DX_HASH_TEA) if (hinfo.hash_version <= DX_HASH_TEA)
hinfo.hash_version += hinfo.hash_version +=
...@@ -801,7 +801,7 @@ static void ext4_update_dx_flag(struct inode *inode) ...@@ -801,7 +801,7 @@ static void ext4_update_dx_flag(struct inode *inode)
{ {
if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_COMPAT_DIR_INDEX)) EXT4_FEATURE_COMPAT_DIR_INDEX))
EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL; ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
} }
/* /*
...@@ -943,8 +943,8 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, ...@@ -943,8 +943,8 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
wait_on_buffer(bh); wait_on_buffer(bh);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */ /* read error, skip block & hope for the best */
ext4_error(sb, "reading directory #%lu offset %lu", EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
dir->i_ino, (unsigned long)block); (unsigned long) block);
brelse(bh); brelse(bh);
goto next; goto next;
} }
...@@ -1066,15 +1066,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru ...@@ -1066,15 +1066,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
__u32 ino = le32_to_cpu(de->inode); __u32 ino = le32_to_cpu(de->inode);
brelse(bh); brelse(bh);
if (!ext4_valid_inum(dir->i_sb, ino)) { if (!ext4_valid_inum(dir->i_sb, ino)) {
ext4_error(dir->i_sb, "bad inode number: %u", ino); EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
inode = ext4_iget(dir->i_sb, ino); inode = ext4_iget(dir->i_sb, ino);
if (unlikely(IS_ERR(inode))) { if (unlikely(IS_ERR(inode))) {
if (PTR_ERR(inode) == -ESTALE) { if (PTR_ERR(inode) == -ESTALE) {
ext4_error(dir->i_sb, EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u", "deleted inode referenced: %u",
ino); ino);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} else { } else {
return ERR_CAST(inode); return ERR_CAST(inode);
...@@ -1104,8 +1104,8 @@ struct dentry *ext4_get_parent(struct dentry *child) ...@@ -1104,8 +1104,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
brelse(bh); brelse(bh);
if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
ext4_error(child->d_inode->i_sb, EXT4_ERROR_INODE(child->d_inode,
"bad inode number: %u", ino); "bad parent inode number: %u", ino);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
...@@ -1141,7 +1141,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, ...@@ -1141,7 +1141,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
unsigned rec_len = 0; unsigned rec_len = 0;
while (count--) { while (count--) {
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
(from + (map->offs<<2)); (from + (map->offs<<2));
rec_len = EXT4_DIR_REC_LEN(de->name_len); rec_len = EXT4_DIR_REC_LEN(de->name_len);
memcpy (to, de, rec_len); memcpy (to, de, rec_len);
...@@ -1404,9 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, ...@@ -1404,9 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *)((char *)fde + de = (struct ext4_dir_entry_2 *)((char *)fde +
ext4_rec_len_from_disk(fde->rec_len, blocksize)); ext4_rec_len_from_disk(fde->rec_len, blocksize));
if ((char *) de >= (((char *) root) + blocksize)) { if ((char *) de >= (((char *) root) + blocksize)) {
ext4_error(dir->i_sb, EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
"invalid rec_len for '..' in inode %lu",
dir->i_ino);
brelse(bh); brelse(bh);
return -EIO; return -EIO;
} }
...@@ -1418,7 +1416,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, ...@@ -1418,7 +1416,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
brelse(bh); brelse(bh);
return retval; return retval;
} }
EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
data1 = bh2->b_data; data1 = bh2->b_data;
memcpy (data1, de, len); memcpy (data1, de, len);
...@@ -1491,7 +1489,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ...@@ -1491,7 +1489,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
retval = ext4_dx_add_entry(handle, dentry, inode); retval = ext4_dx_add_entry(handle, dentry, inode);
if (!retval || (retval != ERR_BAD_DX_DIR)) if (!retval || (retval != ERR_BAD_DX_DIR))
return retval; return retval;
EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL; ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
dx_fallback++; dx_fallback++;
ext4_mark_inode_dirty(handle, dir); ext4_mark_inode_dirty(handle, dir);
} }
...@@ -1519,6 +1517,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ...@@ -1519,6 +1517,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
retval = add_dirent_to_buf(handle, dentry, inode, de, bh); retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
brelse(bh); brelse(bh);
if (retval == 0)
ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
return retval; return retval;
} }
...@@ -1915,9 +1915,8 @@ static int empty_dir(struct inode *inode) ...@@ -1915,9 +1915,8 @@ static int empty_dir(struct inode *inode)
if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
!(bh = ext4_bread(NULL, inode, 0, 0, &err))) { !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
if (err) if (err)
ext4_error(inode->i_sb, EXT4_ERROR_INODE(inode,
"error %d reading directory #%lu offset 0", "error %d reading directory lblock 0", err);
err, inode->i_ino);
else else
ext4_warning(inode->i_sb, ext4_warning(inode->i_sb,
"bad directory (dir #%lu) - no data block", "bad directory (dir #%lu) - no data block",
...@@ -1941,17 +1940,17 @@ static int empty_dir(struct inode *inode) ...@@ -1941,17 +1940,17 @@ static int empty_dir(struct inode *inode)
de = ext4_next_entry(de1, sb->s_blocksize); de = ext4_next_entry(de1, sb->s_blocksize);
while (offset < inode->i_size) { while (offset < inode->i_size) {
if (!bh || if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
unsigned int lblock;
err = 0; err = 0;
brelse(bh); brelse(bh);
bh = ext4_bread(NULL, inode, lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); bh = ext4_bread(NULL, inode, lblock, 0, &err);
if (!bh) { if (!bh) {
if (err) if (err)
ext4_error(sb, EXT4_ERROR_INODE(inode,
"error %d reading directory" "error %d reading directory "
" #%lu offset %u", "lblock %u", err, lblock);
err, inode->i_ino, offset);
offset += sb->s_blocksize; offset += sb->s_blocksize;
continue; continue;
} }
...@@ -2297,7 +2296,7 @@ static int ext4_symlink(struct inode *dir, ...@@ -2297,7 +2296,7 @@ static int ext4_symlink(struct inode *dir,
} }
} else { } else {
/* clear the extent format for fast symlink */ /* clear the extent format for fast symlink */
EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
inode->i_op = &ext4_fast_symlink_inode_operations; inode->i_op = &ext4_fast_symlink_inode_operations;
memcpy((char *)&EXT4_I(inode)->i_data, symname, l); memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
inode->i_size = l-1; inode->i_size = l-1;
......
...@@ -911,7 +911,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ...@@ -911,7 +911,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
percpu_counter_add(&sbi->s_freeinodes_counter, percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb)); EXT4_INODES_PER_GROUP(sb));
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
sbi->s_log_groups_per_flex) {
ext4_group_t flex_group; ext4_group_t flex_group;
flex_group = ext4_flex_group(sbi, input->group); flex_group = ext4_flex_group(sbi, input->group);
atomic_add(input->free_blocks_count, atomic_add(input->free_blocks_count,
......
...@@ -241,6 +241,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) ...@@ -241,6 +241,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS); return ERR_PTR(-EROFS);
vfs_check_frozen(sb, SB_FREEZE_WRITE);
/* Special case here: if the journal has aborted behind our /* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to * backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */ * take the FS itself readonly cleanly. */
...@@ -941,6 +942,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) ...@@ -941,6 +942,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
seq_puts(seq, ",journal_async_commit"); seq_puts(seq, ",journal_async_commit");
else if (test_opt(sb, JOURNAL_CHECKSUM))
seq_puts(seq, ",journal_checksum");
if (test_opt(sb, NOBH)) if (test_opt(sb, NOBH))
seq_puts(seq, ",nobh"); seq_puts(seq, ",nobh");
if (test_opt(sb, I_VERSION)) if (test_opt(sb, I_VERSION))
...@@ -2213,7 +2216,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) ...@@ -2213,7 +2216,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
struct ext4_attr { struct ext4_attr {
struct attribute attr; struct attribute attr;
ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
const char *, size_t); const char *, size_t);
int offset; int offset;
}; };
...@@ -2430,6 +2433,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -2430,6 +2433,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock) __releases(kernel_lock)
__acquires(kernel_lock) __acquires(kernel_lock)
{ {
char *orig_data = kstrdup(data, GFP_KERNEL);
struct buffer_head *bh; struct buffer_head *bh;
struct ext4_super_block *es = NULL; struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi; struct ext4_sb_info *sbi;
...@@ -2793,24 +2797,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -2793,24 +2797,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
get_random_bytes(&sbi->s_next_generation, sizeof(u32)); get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock); spin_lock_init(&sbi->s_next_gen_lock);
err = percpu_counter_init(&sbi->s_freeblocks_counter,
ext4_count_free_blocks(sb));
if (!err) {
err = percpu_counter_init(&sbi->s_freeinodes_counter,
ext4_count_free_inodes(sb));
}
if (!err) {
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sb));
}
if (!err) {
err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
}
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount3;
}
sbi->s_stripe = ext4_get_stripe_size(sbi); sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_max_writeback_mb_bump = 128; sbi->s_max_writeback_mb_bump = 128;
...@@ -2910,6 +2896,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -2910,6 +2896,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
no_journal: no_journal:
err = percpu_counter_init(&sbi->s_freeblocks_counter,
ext4_count_free_blocks(sb));
if (!err)
err = percpu_counter_init(&sbi->s_freeinodes_counter,
ext4_count_free_inodes(sb));
if (!err)
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sb));
if (!err)
err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount_wq;
}
if (test_opt(sb, NOBH)) { if (test_opt(sb, NOBH)) {
if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
...@@ -3001,7 +3001,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3001,7 +3001,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
err = ext4_setup_system_zone(sb); err = ext4_setup_system_zone(sb);
if (err) { if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize system " ext4_msg(sb, KERN_ERR, "failed to initialize system "
"zone (%d)\n", err); "zone (%d)", err);
goto failed_mount4; goto failed_mount4;
} }
...@@ -3040,9 +3040,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3040,9 +3040,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
} else } else
descr = "out journal"; descr = "out journal";
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Opts: %s", descr, orig_data);
lock_kernel(); lock_kernel();
kfree(orig_data);
return 0; return 0;
cantfind_ext4: cantfind_ext4:
...@@ -3059,6 +3061,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3059,6 +3061,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
jbd2_journal_destroy(sbi->s_journal); jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL; sbi->s_journal = NULL;
} }
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
failed_mount3: failed_mount3:
if (sbi->s_flex_groups) { if (sbi->s_flex_groups) {
if (is_vmalloc_addr(sbi->s_flex_groups)) if (is_vmalloc_addr(sbi->s_flex_groups))
...@@ -3066,10 +3072,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3066,10 +3072,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
else else
kfree(sbi->s_flex_groups); kfree(sbi->s_flex_groups);
} }
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
failed_mount2: failed_mount2:
for (i = 0; i < db_count; i++) for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]); brelse(sbi->s_group_desc[i]);
...@@ -3089,6 +3091,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3089,6 +3091,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
kfree(sbi->s_blockgroup_lock); kfree(sbi->s_blockgroup_lock);
kfree(sbi); kfree(sbi);
lock_kernel(); lock_kernel();
kfree(orig_data);
return ret; return ret;
} }
...@@ -3380,7 +3383,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) ...@@ -3380,7 +3383,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
if (!(sb->s_flags & MS_RDONLY)) if (!(sb->s_flags & MS_RDONLY))
es->s_wtime = cpu_to_le32(get_seconds()); es->s_wtime = cpu_to_le32(get_seconds());
es->s_kbytes_written = es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
EXT4_SB(sb)->s_sectors_written_start) >> 1)); EXT4_SB(sb)->s_sectors_written_start) >> 1));
ext4_free_blocks_count_set(es, percpu_counter_sum_positive( ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
...@@ -3485,8 +3488,10 @@ int ext4_force_commit(struct super_block *sb) ...@@ -3485,8 +3488,10 @@ int ext4_force_commit(struct super_block *sb)
return 0; return 0;
journal = EXT4_SB(sb)->s_journal; journal = EXT4_SB(sb)->s_journal;
if (journal) if (journal) {
vfs_check_frozen(sb, SB_FREEZE_WRITE);
ret = ext4_journal_force_commit(journal); ret = ext4_journal_force_commit(journal);
}
return ret; return ret;
} }
...@@ -3535,18 +3540,16 @@ static int ext4_freeze(struct super_block *sb) ...@@ -3535,18 +3540,16 @@ static int ext4_freeze(struct super_block *sb)
* the journal. * the journal.
*/ */
error = jbd2_journal_flush(journal); error = jbd2_journal_flush(journal);
if (error < 0) { if (error < 0)
out: goto out;
jbd2_journal_unlock_updates(journal);
return error;
}
/* Journal blocked and flushed, clear needs_recovery flag. */ /* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
error = ext4_commit_super(sb, 1); error = ext4_commit_super(sb, 1);
if (error) out:
goto out; /* we rely on s_frozen to stop further updates */
return 0; jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
return error;
} }
/* /*
...@@ -3563,7 +3566,6 @@ static int ext4_unfreeze(struct super_block *sb) ...@@ -3563,7 +3566,6 @@ static int ext4_unfreeze(struct super_block *sb)
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
ext4_commit_super(sb, 1); ext4_commit_super(sb, 1);
unlock_super(sb); unlock_super(sb);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
return 0; return 0;
} }
...@@ -3580,6 +3582,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -3580,6 +3582,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
int i; int i;
#endif #endif
char *orig_data = kstrdup(data, GFP_KERNEL);
lock_kernel(); lock_kernel();
...@@ -3713,6 +3716,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -3713,6 +3716,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
#endif #endif
unlock_super(sb); unlock_super(sb);
unlock_kernel(); unlock_kernel();
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
return 0; return 0;
restore_opts: restore_opts:
...@@ -3734,6 +3740,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -3734,6 +3740,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
#endif #endif
unlock_super(sb); unlock_super(sb);
unlock_kernel(); unlock_kernel();
kfree(orig_data);
return err; return err;
} }
...@@ -4141,6 +4148,7 @@ static int __init init_ext4_fs(void) ...@@ -4141,6 +4148,7 @@ static int __init init_ext4_fs(void)
{ {
int err; int err;
ext4_check_flag_values();
err = init_ext4_system_zone(); err = init_ext4_system_zone();
if (err) if (err)
return err; return err;
......
...@@ -34,6 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { ...@@ -34,6 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
.readlink = generic_readlink, .readlink = generic_readlink,
.follow_link = page_follow_link_light, .follow_link = page_follow_link_light,
.put_link = page_put_link, .put_link = page_put_link,
.setattr = ext4_setattr,
#ifdef CONFIG_EXT4_FS_XATTR #ifdef CONFIG_EXT4_FS_XATTR
.setxattr = generic_setxattr, .setxattr = generic_setxattr,
.getxattr = generic_getxattr, .getxattr = generic_getxattr,
...@@ -45,6 +46,7 @@ const struct inode_operations ext4_symlink_inode_operations = { ...@@ -45,6 +46,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
const struct inode_operations ext4_fast_symlink_inode_operations = { const struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink, .readlink = generic_readlink,
.follow_link = ext4_follow_link, .follow_link = ext4_follow_link,
.setattr = ext4_setattr,
#ifdef CONFIG_EXT4_FS_XATTR #ifdef CONFIG_EXT4_FS_XATTR
.setxattr = generic_setxattr, .setxattr = generic_setxattr,
.getxattr = generic_getxattr, .getxattr = generic_getxattr,
......
...@@ -228,9 +228,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, ...@@ -228,9 +228,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext4_xattr_check_block(bh)) { if (ext4_xattr_check_block(bh)) {
bad_block: bad_block:
ext4_error(inode->i_sb, EXT4_ERROR_INODE(inode, "bad block %llu",
"inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl);
EXT4_I(inode)->i_file_acl);
error = -EIO; error = -EIO;
goto cleanup; goto cleanup;
} }
...@@ -372,9 +371,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) ...@@ -372,9 +371,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
ea_bdebug(bh, "b_count=%d, refcount=%d", ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext4_xattr_check_block(bh)) { if (ext4_xattr_check_block(bh)) {
ext4_error(inode->i_sb, EXT4_ERROR_INODE(inode, "bad block %llu",
"inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl);
EXT4_I(inode)->i_file_acl);
error = -EIO; error = -EIO;
goto cleanup; goto cleanup;
} }
...@@ -666,8 +664,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, ...@@ -666,8 +664,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
atomic_read(&(bs->bh->b_count)), atomic_read(&(bs->bh->b_count)),
le32_to_cpu(BHDR(bs->bh)->h_refcount)); le32_to_cpu(BHDR(bs->bh)->h_refcount));
if (ext4_xattr_check_block(bs->bh)) { if (ext4_xattr_check_block(bs->bh)) {
ext4_error(sb, "inode %lu: bad block %llu", EXT4_ERROR_INODE(inode, "bad block %llu",
inode->i_ino, EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
error = -EIO; error = -EIO;
goto cleanup; goto cleanup;
} }
...@@ -820,7 +818,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -820,7 +818,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
EXT4_I(inode)->i_block_group); EXT4_I(inode)->i_block_group);
/* non-extent files can't have physical blocks past 2^32 */ /* non-extent files can't have physical blocks past 2^32 */
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
block = ext4_new_meta_blocks(handle, inode, block = ext4_new_meta_blocks(handle, inode,
...@@ -828,7 +826,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -828,7 +826,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error) if (error)
goto cleanup; goto cleanup;
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
ea_idebug(inode, "creating block %d", block); ea_idebug(inode, "creating block %d", block);
...@@ -880,8 +878,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -880,8 +878,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
goto cleanup; goto cleanup;
bad_block: bad_block:
ext4_error(inode->i_sb, "inode %lu: bad block %llu", EXT4_ERROR_INODE(inode, "bad block %llu",
inode->i_ino, EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
#undef header #undef header
...@@ -1194,8 +1192,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, ...@@ -1194,8 +1192,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
if (!bh) if (!bh)
goto cleanup; goto cleanup;
if (ext4_xattr_check_block(bh)) { if (ext4_xattr_check_block(bh)) {
ext4_error(inode->i_sb, "inode %lu: bad block %llu", EXT4_ERROR_INODE(inode, "bad block %llu",
inode->i_ino, EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
error = -EIO; error = -EIO;
goto cleanup; goto cleanup;
} }
...@@ -1372,14 +1370,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) ...@@ -1372,14 +1370,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
goto cleanup; goto cleanup;
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh) { if (!bh) {
ext4_error(inode->i_sb, "inode %lu: block %llu read error", EXT4_ERROR_INODE(inode, "block %llu read error",
inode->i_ino, EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
} }
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1)) { BHDR(bh)->h_blocks != cpu_to_le32(1)) {
ext4_error(inode->i_sb, "inode %lu: bad block %llu", EXT4_ERROR_INODE(inode, "bad block %llu",
inode->i_ino, EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
} }
ext4_xattr_release_block(handle, inode, bh); ext4_xattr_release_block(handle, inode, bh);
...@@ -1504,9 +1502,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, ...@@ -1504,9 +1502,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
} }
bh = sb_bread(inode->i_sb, ce->e_block); bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) { if (!bh) {
ext4_error(inode->i_sb, EXT4_ERROR_INODE(inode, "block %lu read error",
"inode %lu: block %lu read error", (unsigned long) ce->e_block);
inode->i_ino, (unsigned long) ce->e_block);
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >= } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
EXT4_XATTR_REFCOUNT_MAX) { EXT4_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %lu refcount %d>=%d", ea_idebug(inode, "block %lu refcount %d>=%d",
......
...@@ -1311,7 +1311,6 @@ int jbd2_journal_stop(handle_t *handle) ...@@ -1311,7 +1311,6 @@ int jbd2_journal_stop(handle_t *handle)
if (handle->h_sync) if (handle->h_sync)
transaction->t_synchronous_commit = 1; transaction->t_synchronous_commit = 1;
current->journal_info = NULL; current->journal_info = NULL;
spin_lock(&journal->j_state_lock);
spin_lock(&transaction->t_handle_lock); spin_lock(&transaction->t_handle_lock);
transaction->t_outstanding_credits -= handle->h_buffer_credits; transaction->t_outstanding_credits -= handle->h_buffer_credits;
transaction->t_updates--; transaction->t_updates--;
...@@ -1340,8 +1339,7 @@ int jbd2_journal_stop(handle_t *handle) ...@@ -1340,8 +1339,7 @@ int jbd2_journal_stop(handle_t *handle)
jbd_debug(2, "transaction too old, requesting commit for " jbd_debug(2, "transaction too old, requesting commit for "
"handle %p\n", handle); "handle %p\n", handle);
/* This is non-blocking */ /* This is non-blocking */
__jbd2_log_start_commit(journal, transaction->t_tid); jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock);
/* /*
* Special case: JBD2_SYNC synchronous updates require us * Special case: JBD2_SYNC synchronous updates require us
...@@ -1351,7 +1349,6 @@ int jbd2_journal_stop(handle_t *handle) ...@@ -1351,7 +1349,6 @@ int jbd2_journal_stop(handle_t *handle)
err = jbd2_log_wait_commit(journal, tid); err = jbd2_log_wait_commit(journal, tid);
} else { } else {
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock);
} }
lock_map_release(&handle->h_lockdep_map); lock_map_release(&handle->h_lockdep_map);
......
...@@ -1514,11 +1514,13 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) ...@@ -1514,11 +1514,13 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
/* /*
* This operation can block, but only after everything is updated * This operation can block, but only after everything is updated
*/ */
int __dquot_alloc_space(struct inode *inode, qsize_t number, int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
int warn, int reserve)
{ {
int cnt, ret = 0; int cnt, ret = 0;
char warntype[MAXQUOTAS]; char warntype[MAXQUOTAS];
int warn = flags & DQUOT_SPACE_WARN;
int reserve = flags & DQUOT_SPACE_RESERVE;
int nofail = flags & DQUOT_SPACE_NOFAIL;
/* /*
* First test before acquiring mutex - solves deadlocks when we * First test before acquiring mutex - solves deadlocks when we
...@@ -1539,7 +1541,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, ...@@ -1539,7 +1541,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
continue; continue;
ret = check_bdq(inode->i_dquot[cnt], number, !warn, ret = check_bdq(inode->i_dquot[cnt], number, !warn,
warntype+cnt); warntype+cnt);
if (ret) { if (ret && !nofail) {
spin_unlock(&dq_data_lock); spin_unlock(&dq_data_lock);
goto out_flush_warn; goto out_flush_warn;
} }
...@@ -1638,10 +1640,11 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); ...@@ -1638,10 +1640,11 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
/* /*
* This operation can block, but only after everything is updated * This operation can block, but only after everything is updated
*/ */
void __dquot_free_space(struct inode *inode, qsize_t number, int reserve) void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
{ {
unsigned int cnt; unsigned int cnt;
char warntype[MAXQUOTAS]; char warntype[MAXQUOTAS];
int reserve = flags & DQUOT_SPACE_RESERVE;
/* First test before acquiring mutex - solves deadlocks when we /* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */ * re-enter the quota code and are already holding the mutex */
......
...@@ -9,6 +9,10 @@ ...@@ -9,6 +9,10 @@
#include <linux/fs.h> #include <linux/fs.h>
#define DQUOT_SPACE_WARN 0x1
#define DQUOT_SPACE_RESERVE 0x2
#define DQUOT_SPACE_NOFAIL 0x4
static inline struct quota_info *sb_dqopt(struct super_block *sb) static inline struct quota_info *sb_dqopt(struct super_block *sb)
{ {
return &sb->s_dquot; return &sb->s_dquot;
...@@ -41,9 +45,8 @@ int dquot_scan_active(struct super_block *sb, ...@@ -41,9 +45,8 @@ int dquot_scan_active(struct super_block *sb,
struct dquot *dquot_alloc(struct super_block *sb, int type); struct dquot *dquot_alloc(struct super_block *sb, int type);
void dquot_destroy(struct dquot *dquot); void dquot_destroy(struct dquot *dquot);
int __dquot_alloc_space(struct inode *inode, qsize_t number, int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags);
int warn, int reserve); void __dquot_free_space(struct inode *inode, qsize_t number, int flags);
void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
int dquot_alloc_inode(const struct inode *inode); int dquot_alloc_inode(const struct inode *inode);
...@@ -242,17 +245,17 @@ static inline int dquot_transfer(struct inode *inode, struct iattr *iattr) ...@@ -242,17 +245,17 @@ static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
} }
static inline int __dquot_alloc_space(struct inode *inode, qsize_t number, static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
int warn, int reserve) int flags)
{ {
if (!reserve) if (!(flags & DQUOT_SPACE_RESERVE))
inode_add_bytes(inode, number); inode_add_bytes(inode, number);
return 0; return 0;
} }
static inline void __dquot_free_space(struct inode *inode, qsize_t number, static inline void __dquot_free_space(struct inode *inode, qsize_t number,
int reserve) int flags)
{ {
if (!reserve) if (!(flags & DQUOT_SPACE_RESERVE))
inode_sub_bytes(inode, number); inode_sub_bytes(inode, number);
} }
...@@ -268,7 +271,13 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) ...@@ -268,7 +271,13 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr) static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
{ {
return __dquot_alloc_space(inode, nr, 1, 0); return __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN);
}
static inline void dquot_alloc_space_nofail(struct inode *inode, qsize_t nr)
{
__dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN|DQUOT_SPACE_NOFAIL);
mark_inode_dirty(inode);
} }
static inline int dquot_alloc_space(struct inode *inode, qsize_t nr) static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
...@@ -286,6 +295,11 @@ static inline int dquot_alloc_block_nodirty(struct inode *inode, qsize_t nr) ...@@ -286,6 +295,11 @@ static inline int dquot_alloc_block_nodirty(struct inode *inode, qsize_t nr)
return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits); return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
} }
static inline void dquot_alloc_block_nofail(struct inode *inode, qsize_t nr)
{
dquot_alloc_space_nofail(inode, nr << inode->i_blkbits);
}
static inline int dquot_alloc_block(struct inode *inode, qsize_t nr) static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
{ {
return dquot_alloc_space(inode, nr << inode->i_blkbits); return dquot_alloc_space(inode, nr << inode->i_blkbits);
...@@ -293,7 +307,7 @@ static inline int dquot_alloc_block(struct inode *inode, qsize_t nr) ...@@ -293,7 +307,7 @@ static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr) static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
{ {
return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0); return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0);
} }
static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr) static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
...@@ -308,7 +322,8 @@ static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr) ...@@ -308,7 +322,8 @@ static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
static inline int dquot_reserve_block(struct inode *inode, qsize_t nr) static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
{ {
return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1); return __dquot_alloc_space(inode, nr << inode->i_blkbits,
DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE);
} }
static inline int dquot_claim_block(struct inode *inode, qsize_t nr) static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
...@@ -345,7 +360,7 @@ static inline void dquot_free_block(struct inode *inode, qsize_t nr) ...@@ -345,7 +360,7 @@ static inline void dquot_free_block(struct inode *inode, qsize_t nr)
static inline void dquot_release_reservation_block(struct inode *inode, static inline void dquot_release_reservation_block(struct inode *inode,
qsize_t nr) qsize_t nr)
{ {
__dquot_free_space(inode, nr << inode->i_blkbits, 1); __dquot_free_space(inode, nr << inode->i_blkbits, DQUOT_SPACE_RESERVE);
} }
#endif /* _LINUX_QUOTAOPS_ */ #endif /* _LINUX_QUOTAOPS_ */
...@@ -353,7 +353,7 @@ TRACE_EVENT(ext4_discard_blocks, ...@@ -353,7 +353,7 @@ TRACE_EVENT(ext4_discard_blocks,
jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count) jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
); );
TRACE_EVENT(ext4_mb_new_inode_pa, DECLARE_EVENT_CLASS(ext4__mb_new_pa,
TP_PROTO(struct ext4_allocation_context *ac, TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa), struct ext4_prealloc_space *pa),
...@@ -381,32 +381,20 @@ TRACE_EVENT(ext4_mb_new_inode_pa, ...@@ -381,32 +381,20 @@ TRACE_EVENT(ext4_mb_new_inode_pa,
__entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
); );
TRACE_EVENT(ext4_mb_new_group_pa, DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
TP_PROTO(struct ext4_allocation_context *ac, TP_PROTO(struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa), struct ext4_prealloc_space *pa),
TP_ARGS(ac, pa), TP_ARGS(ac, pa)
);
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
__field( __u64, pa_lstart )
), DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
TP_fast_assign( TP_PROTO(struct ext4_allocation_context *ac,
__entry->dev = ac->ac_sb->s_dev; struct ext4_prealloc_space *pa),
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
__entry->pa_lstart = pa->pa_lstart;
),
TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", TP_ARGS(ac, pa)
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
); );
TRACE_EVENT(ext4_mb_release_inode_pa, TRACE_EVENT(ext4_mb_release_inode_pa,
...@@ -790,7 +778,7 @@ TRACE_EVENT(ext4_mballoc_prealloc, ...@@ -790,7 +778,7 @@ TRACE_EVENT(ext4_mballoc_prealloc,
__entry->result_len, __entry->result_logical) __entry->result_len, __entry->result_logical)
); );
TRACE_EVENT(ext4_mballoc_discard, DECLARE_EVENT_CLASS(ext4__mballoc,
TP_PROTO(struct ext4_allocation_context *ac), TP_PROTO(struct ext4_allocation_context *ac),
TP_ARGS(ac), TP_ARGS(ac),
...@@ -819,33 +807,18 @@ TRACE_EVENT(ext4_mballoc_discard, ...@@ -819,33 +807,18 @@ TRACE_EVENT(ext4_mballoc_discard,
__entry->result_len, __entry->result_logical) __entry->result_len, __entry->result_logical)
); );
TRACE_EVENT(ext4_mballoc_free, DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard,
TP_PROTO(struct ext4_allocation_context *ac), TP_PROTO(struct ext4_allocation_context *ac),
TP_ARGS(ac), TP_ARGS(ac)
);
TP_STRUCT__entry( DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free,
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u32, result_logical )
__field( int, result_start )
__field( __u32, result_group )
__field( int, result_len )
),
TP_fast_assign( TP_PROTO(struct ext4_allocation_context *ac),
__entry->dev = ac->ac_inode->i_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->result_logical = ac->ac_b_ex.fe_logical;
__entry->result_start = ac->ac_b_ex.fe_start;
__entry->result_group = ac->ac_b_ex.fe_group;
__entry->result_len = ac->ac_b_ex.fe_len;
),
TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", TP_ARGS(ac)
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->result_group, __entry->result_start,
__entry->result_len, __entry->result_logical)
); );
TRACE_EVENT(ext4_forget, TRACE_EVENT(ext4_forget,
...@@ -974,6 +947,39 @@ TRACE_EVENT(ext4_da_release_space, ...@@ -974,6 +947,39 @@ TRACE_EVENT(ext4_da_release_space,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks) __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
); );
DECLARE_EVENT_CLASS(ext4__bitmap_load,
TP_PROTO(struct super_block *sb, unsigned long group),
TP_ARGS(sb, group),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u32, group )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->group = group;
),
TP_printk("dev %s group %u",
jbd2_dev_to_name(__entry->dev), __entry->group)
);
DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,
TP_PROTO(struct super_block *sb, unsigned long group),
TP_ARGS(sb, group)
);
DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load,
TP_PROTO(struct super_block *sb, unsigned long group),
TP_ARGS(sb, group)
);
#endif /* _TRACE_EXT4_H */ #endif /* _TRACE_EXT4_H */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册