提交 6515925b 编写于 作者: L Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Theodore Ts'o:
 "The one new feature added in this patch series is the ability to use
  the "punch hole" functionality for inodes that are not using extent
  maps.

  In the bug fix category, we fixed some races in the AIO and fstrim
  code, and some potential NULL pointer dereferences and memory leaks in
  error handling code paths.

  In the optimization category, we fixed a performance regression in the
  jbd2 layer introduced by commit d9b01934 ("jbd: fix fsync() tid
  wraparound bug", introduced in v3.0) which shows up in the AIM7
  benchmark.  We also further optimized jbd2 by minimize the amount of
  time that transaction handles are held active.

  This patch series also features some additional enhancement of the
  extent status tree, which is now used to cache extent information in a
  more efficient/compact form than what we use on-disk."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (65 commits)
  ext4: fix free clusters calculation in bigalloc filesystem
  ext4: no need to remove extent if len is 0 in ext4_es_remove_extent()
  ext4: fix xattr block allocation/release with bigalloc
  ext4: reclaim extents from extent status tree
  ext4: adjust some functions for reclaiming extents from extent status tree
  ext4: remove single extent cache
  ext4: lookup block mapping in extent status tree
  ext4: track all extent status in extent status tree
  ext4: let ext4_ext_map_blocks return EXT4_MAP_UNWRITTEN flag
  ext4: rename and improbe ext4_es_find_extent()
  ext4: add physical block and status member into extent status tree
  ext4: refine extent status tree
  ext4: use ERR_PTR() abstraction for ext4_append()
  ext4: refactor code to read directory blocks into ext4_read_dirblock()
  ext4: add debugging context for warning in ext4_da_update_reserve_space()
  ext4: use KERN_WARNING for warning messages
  jbd2: use module parameters instead of debugfs for jbd_debug
  ext4: use module parameters instead of debugfs for mballoc_debug
  ext4: start handle at the last possible moment when creating inodes
  ext4: fix the number of credits needed for acl ops with inline data
  ...
......@@ -324,8 +324,8 @@ ext4_acl_chmod(struct inode *inode)
if (error)
return error;
retry:
handle = ext4_journal_start(inode,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
handle = ext4_journal_start(inode, EXT4_HT_XATTR,
ext4_jbd2_credits_xattr(inode));
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
ext4_std_error(inode->i_sb, error);
......@@ -422,7 +422,8 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
acl = NULL;
retry:
handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
handle = ext4_journal_start(inode, EXT4_HT_XATTR,
ext4_jbd2_credits_xattr(inode));
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto release_and_out;
......
......@@ -358,7 +358,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
}
/**
* ext4_read_block_bitmap()
* ext4_read_block_bitmap_nowait()
* @sb: super block
* @block_group: given block group
*
......@@ -457,6 +457,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
struct buffer_head *bh;
bh = ext4_read_block_bitmap_nowait(sb, block_group);
if (!bh)
return NULL;
if (ext4_wait_block_bitmap(sb, block_group, bh)) {
put_bh(bh);
return NULL;
......@@ -482,11 +484,16 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
free_clusters = percpu_counter_read_positive(fcc);
dirty_clusters = percpu_counter_read_positive(dcc);
root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
/*
* r_blocks_count should always be multiple of the cluster ratio so
* we are safe to do a plane bit shift only.
*/
root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
EXT4_FREECLUSTERS_WATERMARK) {
free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
free_clusters = percpu_counter_sum_positive(fcc);
dirty_clusters = percpu_counter_sum_positive(dcc);
}
/* Check whether we have space after accounting for current
......
......@@ -185,6 +185,7 @@ static int ext4_readdir(struct file *filp,
"at offset %llu",
(unsigned long long)filp->f_pos);
filp->f_pos += sb->s_blocksize - offset;
brelse(bh);
continue;
}
set_buffer_verified(bh);
......
......@@ -194,8 +194,7 @@ struct mpage_da_data {
*/
#define EXT4_IO_END_UNWRITTEN 0x0001
#define EXT4_IO_END_ERROR 0x0002
#define EXT4_IO_END_QUEUED 0x0004
#define EXT4_IO_END_DIRECT 0x0008
#define EXT4_IO_END_DIRECT 0x0004
struct ext4_io_page {
struct page *p_page;
......@@ -215,10 +214,8 @@ typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
struct page *page; /* for writepage() path */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
int num_io_pages; /* for writepages() */
......@@ -582,6 +579,8 @@ enum {
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/* Do not put hole in extent cache */
#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
/*
* Flags used by ext4_free_blocks
......@@ -810,17 +809,6 @@ do { \
#endif /* defined(__KERNEL__) || defined(__linux__) */
/*
* storage for cached extent
* If ec_len == 0, then the cache is invalid.
* If ec_start == 0, then the cache represents a gap (null mapping)
*/
struct ext4_ext_cache {
ext4_fsblk_t ec_start;
ext4_lblk_t ec_block;
__u32 ec_len; /* must be 32bit to return holes */
};
#include "extents_status.h"
/*
......@@ -887,7 +875,6 @@ struct ext4_inode_info {
struct inode vfs_inode;
struct jbd2_inode *jinode;
struct ext4_ext_cache i_cached_extent;
/*
* File creation time. Its function is same as that of
* struct timespec i_{a,c,m}time in the generic inode.
......@@ -901,6 +888,8 @@ struct ext4_inode_info {
/* extents status tree */
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
struct list_head i_es_lru;
unsigned int i_es_lru_nr; /* protected by i_es_lock */
/* ialloc */
ext4_group_t i_last_alloc_group;
......@@ -930,6 +919,7 @@ struct ext4_inode_info {
spinlock_t i_completed_io_lock;
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
struct work_struct i_unwritten_work; /* deferred extent conversion */
spinlock_t i_block_reservation_lock;
......@@ -985,7 +975,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
......@@ -1316,6 +1305,11 @@ struct ext4_sb_info {
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_csum_seed;
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
struct list_head s_es_lru;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
......@@ -2007,9 +2001,20 @@ extern int ext4fs_dirhash(const char *name, int len, struct
dx_hash_info *hinfo);
/* ialloc.c */
extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
uid_t *owner);
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
uid_t *owner, int handle_type,
unsigned int line_no, int nblocks);
#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
__ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
0, 0, 0)
#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
type, nblocks) \
__ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
(type), __LINE__, (nblocks))
extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
......@@ -2103,6 +2108,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
extern void ext4_ind_truncate(struct inode *inode);
extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
......@@ -2151,6 +2157,8 @@ extern void *ext4_kvzalloc(size_t size, gfp_t flags);
extern void ext4_kvfree(void *ptr);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
extern const char *ext4_decode_error(struct super_block *sb, int errno,
char nbuf[16]);
extern __printf(4, 5)
void __ext4_error(struct super_block *, const char *, unsigned int,
const char *, ...);
......@@ -2227,6 +2235,8 @@ extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group,
struct ext4_group_desc *gdp);
extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
struct ext4_group_desc *gdp);
extern int ext4_register_li_request(struct super_block *sb,
ext4_group_t first_not_zeroed);
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
......@@ -2454,6 +2464,75 @@ extern const struct file_operations ext4_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
extern void ext4_unwritten_wait(struct inode *inode);
/* inline.c */
extern int ext4_has_inline_data(struct inode *inode);
extern int ext4_get_inline_size(struct inode *inode);
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
extern void ext4_write_inline_data(struct inode *inode,
struct ext4_iloc *iloc,
void *buffer, loff_t pos,
unsigned int len);
extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
unsigned int len);
extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
unsigned int len);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
extern int ext4_readpage_inline(struct inode *inode, struct page *page);
extern int ext4_try_to_write_inline_data(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
unsigned flags,
struct page **pagep);
extern int ext4_write_inline_data_end(struct inode *inode,
loff_t pos, unsigned len,
unsigned copied,
struct page *page);
extern struct buffer_head *
ext4_journalled_write_inline_data(struct inode *inode,
unsigned len,
struct page *page);
extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
unsigned flags,
struct page **pagep,
void **fsdata);
extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
unsigned len, unsigned copied,
struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle,
struct inode *parent,
struct inode *inode);
extern int ext4_read_inline_dir(struct file *filp,
void *dirent, filldir_t filldir,
int *has_inline_data);
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data);
extern int ext4_delete_inline_entry(handle_t *handle,
struct inode *dir,
struct ext4_dir_entry_2 *de_del,
struct buffer_head *bh,
int *has_inline_data);
extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
struct ext4_dir_entry_2 **parent_de,
int *retval);
extern int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
int *has_inline);
extern int ext4_try_to_evict_inline_data(handle_t *handle,
struct inode *inode,
int needed);
extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
extern int ext4_convert_inline_data(struct inode *inode);
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
extern const struct inode_operations ext4_special_inode_operations;
......@@ -2520,6 +2599,9 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
extern int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t lblk_start,
ext4_lblk_t lblk_end);
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
......@@ -2537,6 +2619,7 @@ extern void ext4_exit_pageio(void);
extern void ext4_ioend_wait(struct inode *);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern void ext4_end_io_work(struct work_struct *work);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
......
......@@ -193,12 +193,6 @@ static inline unsigned short ext_depth(struct inode *inode)
return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
}
static inline void
ext4_ext_invalidate_cache(struct inode *inode)
{
EXT4_I(inode)->i_cached_extent.ec_len = 0;
}
static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
{
/* We can not have an uninitialized extent of zero length! */
......
......@@ -6,6 +6,108 @@
#include <trace/events/ext4.h>
/* Just increment the non-pointer handle value */
static handle_t *ext4_get_nojournal(void)
{
handle_t *handle = current->journal_info;
unsigned long ref_cnt = (unsigned long)handle;
BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
ref_cnt++;
handle = (handle_t *)ref_cnt;
current->journal_info = handle;
return handle;
}
/* Decrement the non-pointer handle value */
static void ext4_put_nojournal(handle_t *handle)
{
unsigned long ref_cnt = (unsigned long)handle;
BUG_ON(ref_cnt == 0);
ref_cnt--;
handle = (handle_t *)ref_cnt;
current->journal_info = handle;
}
/*
* Wrappers for jbd2_journal_start/end.
*/
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
int type, int nblocks)
{
journal_t *journal;
trace_ext4_journal_start(sb, nblocks, _RET_IP_);
if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS);
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
if (!journal)
return ext4_get_nojournal();
/*
* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly.
*/
if (is_journal_aborted(journal)) {
ext4_abort(sb, "Detected aborted journal");
return ERR_PTR(-EROFS);
}
return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line);
}
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
{
struct super_block *sb;
int err;
int rc;
if (!ext4_handle_valid(handle)) {
ext4_put_nojournal(handle);
return 0;
}
sb = handle->h_transaction->t_journal->j_private;
err = handle->h_err;
rc = jbd2_journal_stop(handle);
if (!err)
err = rc;
if (err)
__ext4_std_error(sb, where, line, err);
return err;
}
void ext4_journal_abort_handle(const char *caller, unsigned int line,
const char *err_fn, struct buffer_head *bh,
handle_t *handle, int err)
{
char nbuf[16];
const char *errstr = ext4_decode_error(NULL, err, nbuf);
BUG_ON(!ext4_handle_valid(handle));
if (bh)
BUFFER_TRACE(bh, "abort");
if (!handle->h_err)
handle->h_err = err;
if (is_handle_aborted(handle))
return;
printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
caller, line, errstr, err_fn);
jbd2_journal_abort_handle(handle);
}
int __ext4_journal_get_write_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh)
{
......
......@@ -59,12 +59,6 @@
#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
/* Delete operations potentially hit one directory's namespace plus an
* entire inode, plus arbitrary amounts of bitmap/indirection data. Be
* generous. We can grow the delete transaction later if necessary. */
#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
/* Define an arbitrary limit for the amount of data we will anticipate
* writing to any given transaction. For unbounded transactions such as
* write(2) and truncate(2) we can write more than this, but we always
......@@ -110,6 +104,36 @@
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
static inline int ext4_jbd2_credits_xattr(struct inode *inode)
{
int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
/*
* In case of inline data, we may push out the data to a block,
* so we need to reserve credits for this eventuality
*/
if (ext4_has_inline_data(inode))
credits += ext4_writepage_trans_blocks(inode) + 1;
return credits;
}
/*
* Ext4 handle operation types -- for logging purposes
*/
#define EXT4_HT_MISC 0
#define EXT4_HT_INODE 1
#define EXT4_HT_WRITE_PAGE 2
#define EXT4_HT_MAP_BLOCKS 3
#define EXT4_HT_DIR 4
#define EXT4_HT_TRUNCATE 5
#define EXT4_HT_QUOTA 6
#define EXT4_HT_RESIZE 7
#define EXT4_HT_MIGRATE 8
#define EXT4_HT_MOVE_EXTENTS 9
#define EXT4_HT_XATTR 10
#define EXT4_HT_MAX 11
/**
* struct ext4_journal_cb_entry - Base structure for callback information.
*
......@@ -234,7 +258,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
#define ext4_handle_dirty_super(handle, sb) \
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
int type, int nblocks);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
......@@ -268,9 +293,17 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
return 1;
}
static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
#define ext4_journal_start_sb(sb, type, nblocks) \
__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks))
#define ext4_journal_start(inode, type, nblocks) \
__ext4_journal_start((inode), __LINE__, (type), (nblocks))
static inline handle_t *__ext4_journal_start(struct inode *inode,
unsigned int line, int type,
int nblocks)
{
return ext4_journal_start_sb(inode->i_sb, nblocks);
return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks);
}
#define ext4_journal_stop(handle) \
......
此差异已折叠。
此差异已折叠。
......@@ -20,10 +20,21 @@
#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
#define EXTENT_STATUS_WRITTEN 0x80000000 /* written extent */
#define EXTENT_STATUS_UNWRITTEN 0x40000000 /* unwritten extent */
#define EXTENT_STATUS_DELAYED 0x20000000 /* delayed extent */
#define EXTENT_STATUS_HOLE 0x10000000 /* hole */
#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
EXTENT_STATUS_UNWRITTEN | \
EXTENT_STATUS_DELAYED | \
EXTENT_STATUS_HOLE)
struct extent_status {
struct rb_node rb_node;
ext4_lblk_t start; /* first block extent covers */
ext4_lblk_t len; /* length of extent in block */
ext4_lblk_t es_lblk; /* first logical block extent covers */
ext4_lblk_t es_len; /* length of extent in block */
ext4_fsblk_t es_pblk; /* first physical block */
};
struct ext4_es_tree {
......@@ -35,11 +46,69 @@ extern int __init ext4_init_es(void);
extern void ext4_exit_es(void);
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t start,
extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned long long status);
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t start,
ext4_lblk_t len);
extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
struct extent_status *es);
extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
static inline int ext4_es_is_written(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_WRITTEN);
}
static inline int ext4_es_is_unwritten(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_UNWRITTEN);
}
static inline int ext4_es_is_delayed(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_DELAYED);
}
static inline int ext4_es_is_hole(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_HOLE);
}
static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_FLAGS);
}
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
{
return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
}
static inline void ext4_es_store_pblock(struct extent_status *es,
ext4_fsblk_t pb)
{
ext4_fsblk_t block;
block = (pb & ~EXTENT_STATUS_FLAGS) |
(es->es_pblk & EXTENT_STATUS_FLAGS);
es->es_pblk = block;
}
static inline void ext4_es_store_status(struct extent_status *es,
unsigned long long status)
{
ext4_fsblk_t block;
block = (status & EXTENT_STATUS_FLAGS) |
(es->es_pblk & ~EXTENT_STATUS_FLAGS);
es->es_pblk = block;
}
extern void ext4_es_register_shrinker(struct super_block *sb);
extern void ext4_es_unregister_shrinker(struct super_block *sb);
extern void ext4_es_lru_add(struct inode *inode);
extern void ext4_es_lru_del(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */
......@@ -240,7 +240,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
handle_t *handle;
int err;
handle = ext4_journal_start_sb(sb, 1);
handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
......@@ -464,10 +464,8 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
* If there is a delay extent at this offset,
* it will be as a data.
*/
es.start = last;
(void)ext4_es_find_extent(inode, &es);
if (last >= es.start &&
last < es.start + es.len) {
ext4_es_find_delayed_extent(inode, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
if (last != start)
dataoff = last << blkbits;
break;
......@@ -549,11 +547,9 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
* If there is a delay extent at this offset,
* we will skip this extent.
*/
es.start = last;
(void)ext4_es_find_extent(inode, &es);
if (last >= es.start &&
last < es.start + es.len) {
last = es.start + es.len;
ext4_es_find_delayed_extent(inode, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
last = es.es_lblk + es.es_len;
holeoff = last << blkbits;
continue;
}
......
......@@ -155,11 +155,11 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
/* Check to see if the seed is all zero's */
if (hinfo->seed) {
for (i = 0; i < 4; i++) {
if (hinfo->seed[i])
if (hinfo->seed[i]) {
memcpy(buf, hinfo->seed, sizeof(buf));
break;
}
}
if (i < 4)
memcpy(buf, hinfo->seed, sizeof(buf));
}
switch (hinfo->hash_version) {
......
......@@ -634,8 +634,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
const struct qstr *qstr, __u32 goal, uid_t *owner)
struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
umode_t mode, const struct qstr *qstr,
__u32 goal, uid_t *owner, int handle_type,
unsigned int line_no, int nblocks)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
......@@ -725,6 +727,15 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
"inode=%lu", ino + 1);
continue;
}
if (!handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
handle_type, nblocks);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto fail;
}
}
BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
if (err)
......@@ -1017,17 +1028,17 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
inode = NULL;
bad_orphan:
ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
bit, (unsigned long long)bitmap_bh->b_blocknr,
ext4_test_bit(bit, bitmap_bh->b_data));
printk(KERN_NOTICE "inode=%p\n", inode);
printk(KERN_WARNING "inode=%p\n", inode);
if (inode) {
printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
is_bad_inode(inode));
printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
printk(KERN_WARNING "max_ino=%lu\n", max_ino);
printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
......@@ -1137,7 +1148,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
goto out;
handle = ext4_journal_start_sb(sb, 1);
handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
......
......@@ -146,6 +146,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
struct super_block *sb = inode->i_sb;
Indirect *p = chain;
struct buffer_head *bh;
int ret = -EIO;
*err = 0;
/* i_data is not going away, no lock needed */
......@@ -154,8 +155,10 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
goto no_block;
while (--depth) {
bh = sb_getblk(sb, le32_to_cpu(p->key));
if (unlikely(!bh))
if (unlikely(!bh)) {
ret = -ENOMEM;
goto failure;
}
if (!bh_uptodate_or_lock(bh)) {
if (bh_submit_read(bh) < 0) {
......@@ -177,7 +180,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
return NULL;
failure:
*err = -EIO;
*err = ret;
no_block:
return p;
}
......@@ -355,9 +358,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
* for the first direct block
*/
new_blocks[index] = current_block;
printk(KERN_INFO "%s returned more blocks than "
WARN(1, KERN_INFO "%s returned more blocks than "
"requested\n", __func__);
WARN_ON(1);
break;
}
}
......@@ -471,7 +473,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
if (unlikely(!bh)) {
err = -EIO;
err = -ENOMEM;
goto failed;
}
......@@ -789,7 +791,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
if (final_size > inode->i_size) {
/* Credits for sb + inode write */
handle = ext4_journal_start(inode, 2);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
......@@ -849,7 +851,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
int err;
/* Credits for sb + inode write */
handle = ext4_journal_start(inode, 2);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
/* This is really bad luck. We've written the data
* but cannot extend i_size. Bail out and pretend
......@@ -948,7 +950,8 @@ static handle_t *start_transaction(struct inode *inode)
{
handle_t *result;
result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode));
result = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
ext4_blocks_for_truncate(inode));
if (!IS_ERR(result))
return result;
......@@ -1515,3 +1518,243 @@ void ext4_ind_truncate(struct inode *inode)
trace_ext4_truncate_exit(inode);
}
static int free_hole_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *parent_bh, __le32 *i_data,
int level, ext4_lblk_t first,
ext4_lblk_t count, int max)
{
struct buffer_head *bh = NULL;
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
int ret = 0;
int i, inc;
ext4_lblk_t offset;
__le32 blk;
inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
if (offset >= count + first)
break;
if (*i_data == 0 || (offset + inc) <= first)
continue;
blk = *i_data;
if (level > 0) {
ext4_lblk_t first2;
bh = sb_bread(inode->i_sb, blk);
if (!bh) {
EXT4_ERROR_INODE_BLOCK(inode, blk,
"Read failure");
return -EIO;
}
first2 = (first > offset) ? first - offset : 0;
ret = free_hole_blocks(handle, inode, bh,
(__le32 *)bh->b_data, level - 1,
first2, count - offset,
inode->i_sb->s_blocksize >> 2);
if (ret) {
brelse(bh);
goto err;
}
}
if (level == 0 ||
(bh && all_zeroes((__le32 *)bh->b_data,
(__le32 *)bh->b_data + addr_per_block))) {
ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
*i_data = 0;
}
brelse(bh);
bh = NULL;
}
err:
return ret;
}
static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t first, ext4_lblk_t stop)
{
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
int level, ret = 0;
int num = EXT4_NDIR_BLOCKS;
ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
__le32 *i_data = EXT4_I(inode)->i_data;
count = stop - first;
for (level = 0; level < 4; level++, max *= addr_per_block) {
if (first < max) {
ret = free_hole_blocks(handle, inode, NULL, i_data,
level, first, count, num);
if (ret)
goto err;
if (count > max - first)
count -= max - first;
else
break;
first = 0;
} else {
first -= max;
}
i_data += num;
if (level == 0) {
num = 1;
max = 1;
}
}
err:
return ret;
}
int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct super_block *sb = inode->i_sb;
ext4_lblk_t first_block, stop_block;
struct address_space *mapping = inode->i_mapping;
handle_t *handle = NULL;
loff_t first_page, last_page, page_len;
loff_t first_page_offset, last_page_offset;
int err = 0;
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
err = filemap_write_and_wait_range(mapping,
offset, offset + length - 1);
if (err)
return err;
}
mutex_lock(&inode->i_mutex);
/* It's not possible punch hole on append only file */
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
err = -EPERM;
goto out_mutex;
}
if (IS_SWAPFILE(inode)) {
err = -ETXTBSY;
goto out_mutex;
}
/* No need to punch hole beyond i_size */
if (offset >= inode->i_size)
goto out_mutex;
/*
* If the hole extents beyond i_size, set the hole
* to end after the page that contains i_size
*/
if (offset + length > inode->i_size) {
length = inode->i_size +
PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
offset;
}
first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
last_page = (offset + length) >> PAGE_CACHE_SHIFT;
first_page_offset = first_page << PAGE_CACHE_SHIFT;
last_page_offset = last_page << PAGE_CACHE_SHIFT;
/* Now release the pages */
if (last_page_offset > first_page_offset) {
truncate_pagecache_range(inode, first_page_offset,
last_page_offset - 1);
}
/* Wait all existing dio works, newcomers will block on i_mutex */
inode_dio_wait(inode);
handle = start_transaction(inode);
if (IS_ERR(handle))
goto out_mutex;
/*
* Now we need to zero out the non-page-aligned data in the
* pages at the start and tail of the hole, and unmap the buffer
* heads for the block aligned regions of the page that were
* completely zerod.
*/
if (first_page > last_page) {
/*
* If the file space being truncated is contained within a page
* just zero out and unmap the middle of that page
*/
err = ext4_discard_partial_page_buffers(handle,
mapping, offset, length, 0);
if (err)
goto out;
} else {
/*
* Zero out and unmap the paritial page that contains
* the start of the hole
*/
page_len = first_page_offset - offset;
if (page_len > 0) {
err = ext4_discard_partial_page_buffers(handle, mapping,
offset, page_len, 0);
if (err)
goto out;
}
/*
* Zero out and unmap the partial page that contains
* the end of the hole
*/
page_len = offset + length - last_page_offset;
if (page_len > 0) {
err = ext4_discard_partial_page_buffers(handle, mapping,
last_page_offset, page_len, 0);
if (err)
goto out;
}
}
/*
* If i_size contained in the last page, we need to
* unmap and zero the paritial page after i_size
*/
if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
inode->i_size % PAGE_CACHE_SIZE != 0) {
page_len = PAGE_CACHE_SIZE -
(inode->i_size & (PAGE_CACHE_SIZE - 1));
if (page_len > 0) {
err = ext4_discard_partial_page_buffers(handle,
mapping, inode->i_size, page_len, 0);
if (err)
goto out;
}
}
first_block = (offset + sb->s_blocksize - 1) >>
EXT4_BLOCK_SIZE_BITS(sb);
stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
if (first_block >= stop_block)
goto out;
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
err = ext4_es_remove_extent(inode, first_block,
stop_block - first_block);
err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
ext4_discard_preallocations(inode);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
up_write(&EXT4_I(inode)->i_data_sem);
out:
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out_mutex:
mutex_unlock(&inode->i_mutex);
return err;
}
......@@ -545,7 +545,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
return ret;
retry:
handle = ext4_journal_start(inode, needed_blocks);
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
......@@ -657,7 +657,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
* The possible write could happen in the inode,
* so try to reserve the space in inode first.
*/
handle = ext4_journal_start(inode, 1);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
......@@ -853,7 +853,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
if (ret)
return ret;
handle = ext4_journal_start(inode, 1);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
......@@ -1188,7 +1188,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
data_bh = sb_getblk(inode->i_sb, map.m_pblk);
if (!data_bh) {
error = -EIO;
error = -ENOMEM;
goto out_restore;
}
......@@ -1770,7 +1770,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
needed_blocks = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, needed_blocks);
handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
if (IS_ERR(handle))
return;
......@@ -1862,7 +1862,7 @@ int ext4_convert_inline_data(struct inode *inode)
if (error)
return error;
handle = ext4_journal_start(inode, needed_blocks);
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto out_free;
......
此差异已折叠。
......@@ -104,7 +104,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
} else if (oldflags & EXT4_EOFBLOCKS_FL)
ext4_truncate(inode);
handle = ext4_journal_start(inode, 1);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto flags_out;
......@@ -173,7 +173,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
mutex_lock(&inode->i_mutex);
handle = ext4_journal_start(inode, 1);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto unlock_out;
......@@ -313,6 +313,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err == 0)
err = err2;
mnt_drop_write_file(filp);
if (!err && ext4_has_group_desc_csum(sb) &&
test_opt(sb, INIT_INODE_TABLE))
err = ext4_register_li_request(sb, input.group);
group_add_out:
ext4_resize_end(sb);
return err;
......@@ -358,6 +361,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ext4_fsblk_t n_blocks_count;
struct super_block *sb = inode->i_sb;
int err = 0, err2 = 0;
ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
......@@ -388,6 +392,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err == 0)
err = err2;
mnt_drop_write_file(filp);
if (!err && (o_group > EXT4_SB(sb)->s_groups_count) &&
ext4_has_group_desc_csum(sb) &&
test_opt(sb, INIT_INODE_TABLE))
err = ext4_register_li_request(sb, o_group);
resizefs_out:
ext4_resize_end(sb);
return err;
......
......@@ -23,11 +23,18 @@
#include "ext4_jbd2.h"
#include "mballoc.h"
#include <linux/debugfs.h>
#include <linux/log2.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/ext4.h>
#ifdef CONFIG_EXT4_DEBUG
ushort ext4_mballoc_debug __read_mostly;
module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
#endif
/*
* MUSTDO:
* - test ext4_ext_search_left() and ext4_ext_search_right()
......@@ -1884,15 +1891,19 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
case 0:
BUG_ON(ac->ac_2order == 0);
if (grp->bb_largest_free_order < ac->ac_2order)
return 0;
/* Avoid using the first bg of a flexgroup for data files */
if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
(flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
((group % flex_size) == 0))
return 0;
if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
(free / fragments) >= ac->ac_g_ex.fe_len)
return 1;
if (grp->bb_largest_free_order < ac->ac_2order)
return 0;
return 1;
case 1:
if ((free / fragments) >= ac->ac_g_ex.fe_len)
......@@ -2007,7 +2018,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
}
ac->ac_groups_scanned++;
if (cr == 0)
if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
ext4_mb_simple_scan_group(ac, &e4b);
else if (cr == 1 && sbi->s_stripe &&
!(ac->ac_g_ex.fe_len % sbi->s_stripe))
......@@ -2656,40 +2667,6 @@ static void ext4_free_data_callback(struct super_block *sb,
mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
}
#ifdef CONFIG_EXT4_DEBUG
u8 mb_enable_debug __read_mostly;
static struct dentry *debugfs_dir;
static struct dentry *debugfs_debug;
static void __init ext4_create_debugfs_entry(void)
{
debugfs_dir = debugfs_create_dir("ext4", NULL);
if (debugfs_dir)
debugfs_debug = debugfs_create_u8("mballoc-debug",
S_IRUGO | S_IWUSR,
debugfs_dir,
&mb_enable_debug);
}
static void ext4_remove_debugfs_entry(void)
{
debugfs_remove(debugfs_debug);
debugfs_remove(debugfs_dir);
}
#else
static void __init ext4_create_debugfs_entry(void)
{
}
static void ext4_remove_debugfs_entry(void)
{
}
#endif
int __init ext4_init_mballoc(void)
{
ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
......@@ -2711,7 +2688,6 @@ int __init ext4_init_mballoc(void)
kmem_cache_destroy(ext4_ac_cachep);
return -ENOMEM;
}
ext4_create_debugfs_entry();
return 0;
}
......@@ -2726,7 +2702,6 @@ void ext4_exit_mballoc(void)
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_data_cachep);
ext4_groupinfo_destroy_slabs();
ext4_remove_debugfs_entry();
}
......@@ -3872,7 +3847,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
struct super_block *sb = ac->ac_sb;
ext4_group_t ngroups, i;
if (!mb_enable_debug ||
if (!ext4_mballoc_debug ||
(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
return;
......@@ -4005,8 +3980,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
len = ar->len;
/* just a dirty hack to filter too big requests */
if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
len = EXT4_CLUSTERS_PER_GROUP(sb);
/* start searching from the goal */
goal = ar->goal;
......@@ -4136,7 +4111,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
/* The max size of hash table is PREALLOC_TB_SIZE */
order = PREALLOC_TB_SIZE - 1;
/* Add the prealloc space to lg */
rcu_read_lock();
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
pa_inode_list) {
spin_lock(&tmp_pa->pa_lock);
......@@ -4160,12 +4135,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
if (!added)
list_add_tail_rcu(&pa->pa_inode_list,
&lg->lg_prealloc_list[order]);
rcu_read_unlock();
spin_unlock(&lg->lg_prealloc_lock);
/* Now trim the list to be not more than 8 elements */
if (lg_prealloc_count > 8) {
ext4_mb_discard_lg_preallocations(sb, lg,
order, lg_prealloc_count);
order, lg_prealloc_count);
return;
}
return ;
......
......@@ -37,11 +37,11 @@
/*
*/
#ifdef CONFIG_EXT4_DEBUG
extern u8 mb_enable_debug;
extern ushort ext4_mballoc_debug;
#define mb_debug(n, fmt, a...) \
do { \
if ((n) <= mb_enable_debug) { \
if ((n) <= ext4_mballoc_debug) { \
printk(KERN_DEBUG "(%s, %d): %s: ", \
__FILE__, __LINE__, __func__); \
printk(fmt, ## a); \
......
......@@ -456,11 +456,14 @@ int ext4_ext_migrate(struct inode *inode)
*/
return retval;
handle = ext4_journal_start(inode,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
+ 1);
/*
* Worst case we can touch the allocation bitmaps, a bgd
* block, and a block to link in the orphan list. We do need
* need to worry about credits for modifying the quota inode.
*/
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
return retval;
......@@ -507,7 +510,7 @@ int ext4_ext_migrate(struct inode *inode)
ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
up_read((&EXT4_I(inode)->i_data_sem));
handle = ext4_journal_start(inode, 1);
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
if (IS_ERR(handle)) {
/*
* It is impossible to update on-disk structures without
......
......@@ -80,6 +80,8 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
* is not blocked in the elevator. */
if (!*bh)
*bh = sb_getblk(sb, mmp_block);
if (!*bh)
return -ENOMEM;
if (*bh) {
get_bh(*bh);
lock_buffer(*bh);
......@@ -91,7 +93,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
*bh = NULL;
}
}
if (!*bh) {
if (unlikely(!*bh)) {
ext4_warning(sb, "Error while reading MMP block %llu",
mmp_block);
return -EIO;
......
......@@ -681,6 +681,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
depth = ext_depth(donor_inode);
dext = donor_path[depth].p_ext;
if (unlikely(!dext))
goto missing_donor_extent;
tmp_dext = *dext;
*err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
......@@ -691,7 +693,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
/* Loop for the donor extents */
while (1) {
/* The extent for donor must be found. */
if (!dext) {
if (unlikely(!dext)) {
missing_donor_extent:
EXT4_ERROR_INODE(donor_inode,
"The extent for donor must be found");
*err = -EIO;
......@@ -761,9 +764,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
kfree(donor_path);
}
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
return replaced_count;
}
......@@ -920,7 +920,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
again:
*err = 0;
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
handle = ext4_journal_start(orig_inode, jblocks);
handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
if (IS_ERR(handle)) {
*err = PTR_ERR(handle);
return 0;
......
此差异已折叠。
......@@ -23,6 +23,7 @@
#include <linux/workqueue.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include "ext4_jbd2.h"
#include "xattr.h"
......@@ -73,8 +74,6 @@ void ext4_free_io_end(ext4_io_end_t *io)
BUG_ON(!list_empty(&io->list));
BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
if (io->page)
put_page(io->page);
for (i = 0; i < io->num_io_pages; i++)
put_io_page(io->pages[i]);
io->num_io_pages = 0;
......@@ -103,14 +102,13 @@ static int ext4_end_io(ext4_io_end_t *io)
"(inode %lu, offset %llu, size %zd, error %d)",
inode->i_ino, offset, size, ret);
}
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
if (io->flag & EXT4_IO_END_DIRECT)
inode_dio_done(inode);
/* Wake up anyone waiting on unwritten extent conversion */
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
wake_up_all(ext4_ioend_wq(inode));
if (io->flag & EXT4_IO_END_DIRECT)
inode_dio_done(inode);
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
return ret;
}
......@@ -119,7 +117,6 @@ static void dump_completed_IO(struct inode *inode)
#ifdef EXT4FS_DEBUG
struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1;
unsigned long flags;
if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
ext4_debug("inode %lu completed_io list is empty\n",
......@@ -152,26 +149,20 @@ void ext4_add_complete_io(ext4_io_end_t *io_end)
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (list_empty(&ei->i_completed_io_list)) {
io_end->flag |= EXT4_IO_END_QUEUED;
queue_work(wq, &io_end->work);
}
if (list_empty(&ei->i_completed_io_list))
queue_work(wq, &ei->i_unwritten_work);
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
}
static int ext4_do_flush_completed_IO(struct inode *inode,
ext4_io_end_t *work_io)
static int ext4_do_flush_completed_IO(struct inode *inode)
{
ext4_io_end_t *io;
struct list_head unwritten, complete, to_free;
struct list_head unwritten;
unsigned long flags;
struct ext4_inode_info *ei = EXT4_I(inode);
int err, ret = 0;
INIT_LIST_HEAD(&complete);
INIT_LIST_HEAD(&to_free);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
dump_completed_IO(inode);
list_replace_init(&ei->i_completed_io_list, &unwritten);
......@@ -185,32 +176,7 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
err = ext4_end_io(io);
if (unlikely(!ret && err))
ret = err;
list_add_tail(&io->list, &complete);
}
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
while (!list_empty(&complete)) {
io = list_entry(complete.next, ext4_io_end_t, list);
io->flag &= ~EXT4_IO_END_UNWRITTEN;
/* end_io context can not be destroyed now because it still
* used by queued worker. Worker thread will destroy it later */
if (io->flag & EXT4_IO_END_QUEUED)
list_del_init(&io->list);
else
list_move(&io->list, &to_free);
}
/* If we are called from worker context, it is time to clear queued
* flag, and destroy it's end_io if it was converted already */
if (work_io) {
work_io->flag &= ~EXT4_IO_END_QUEUED;
if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
list_add_tail(&work_io->list, &to_free);
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
while (!list_empty(&to_free)) {
io = list_entry(to_free.next, ext4_io_end_t, list);
list_del_init(&io->list);
ext4_free_io_end(io);
}
return ret;
......@@ -219,10 +185,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
/*
* work on completed aio dio IO, to convert unwritten extents to extents
*/
static void ext4_end_io_work(struct work_struct *work)
void ext4_end_io_work(struct work_struct *work)
{
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
ext4_do_flush_completed_IO(io->inode, io);
struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
i_unwritten_work);
ext4_do_flush_completed_IO(&ei->vfs_inode);
}
int ext4_flush_unwritten_io(struct inode *inode)
......@@ -230,7 +197,7 @@ int ext4_flush_unwritten_io(struct inode *inode)
int ret;
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
!(inode->i_state & I_FREEING));
ret = ext4_do_flush_completed_IO(inode, NULL);
ret = ext4_do_flush_completed_IO(inode);
ext4_unwritten_wait(inode);
return ret;
}
......@@ -241,7 +208,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
if (io) {
atomic_inc(&EXT4_I(inode)->i_ioend_count);
io->inode = inode;
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
return io;
......@@ -382,14 +348,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
}
if (!buffer_mapped(bh) || buffer_delay(bh)) {
if (!buffer_mapped(bh))
clear_buffer_dirty(bh);
if (io->io_bio)
ext4_io_submit(io);
return 0;
}
if (io->io_bio && bh->b_blocknr != io->io_next_block) {
submit_and_retry:
ext4_io_submit(io);
......@@ -436,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
if (!io_page) {
set_page_dirty(page);
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return -ENOMEM;
}
......@@ -468,7 +426,15 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
set_buffer_uptodate(bh);
continue;
}
clear_buffer_dirty(bh);
if (!buffer_dirty(bh) || buffer_delay(bh) ||
!buffer_mapped(bh) || buffer_unwritten(bh)) {
/* A hole? We can safely clear the dirty bit */
if (!buffer_mapped(bh))
clear_buffer_dirty(bh);
if (io->io_bio)
ext4_io_submit(io);
continue;
}
ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
if (ret) {
/*
......@@ -476,9 +442,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
* we can do but mark the page as dirty, and
* better luck next time.
*/
set_page_dirty(page);
redirty_page_for_writepage(wbc, page);
break;
}
clear_buffer_dirty(bh);
}
unlock_page(page);
/*
......
......@@ -333,8 +333,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
int err;
bh = sb_getblk(sb, blk);
if (!bh)
return ERR_PTR(-EIO);
if (unlikely(!bh))
return ERR_PTR(-ENOMEM);
if ((err = ext4_journal_get_write_access(handle, bh))) {
brelse(bh);
bh = ERR_PTR(err);
......@@ -410,8 +410,8 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
return err;
bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
if (!bh)
return -EIO;
if (unlikely(!bh))
return -ENOMEM;
err = ext4_journal_get_write_access(handle, bh);
if (err)
......@@ -466,7 +466,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
/* This transaction may be extended/restarted along the way */
handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
if (IS_ERR(handle))
return PTR_ERR(handle);
......@@ -500,8 +500,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
goto out;
gdb = sb_getblk(sb, block);
if (!gdb) {
err = -EIO;
if (unlikely(!gdb)) {
err = -ENOMEM;
goto out;
}
......@@ -1031,7 +1031,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
handle_t *handle;
int err = 0, err2;
handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
if (IS_ERR(handle)) {
group = 1;
err = PTR_ERR(handle);
......@@ -1064,8 +1064,8 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
ext4_bg_has_super(sb, group));
bh = sb_getblk(sb, backup_block);
if (!bh) {
err = -EIO;
if (unlikely(!bh)) {
err = -ENOMEM;
break;
}
ext4_debug("update metadata backup %llu(+%llu)\n",
......@@ -1168,7 +1168,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
{
struct buffer_head *bh = sb_getblk(sb, block);
if (!bh)
if (unlikely(!bh))
return NULL;
if (!bh_uptodate_or_lock(bh)) {
if (bh_submit_read(bh) < 0) {
......@@ -1412,7 +1412,7 @@ static int ext4_flex_group_add(struct super_block *sb,
* modify each of the reserved GDT dindirect blocks.
*/
credit = flex_gd->count * 4 + reserved_gdb;
handle = ext4_journal_start_sb(sb, credit);
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto exit;
......@@ -1506,10 +1506,12 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
group_data[i].blocks_count = blocks_per_group;
overhead = ext4_group_overhead_blocks(sb, group + i);
group_data[i].free_blocks_count = blocks_per_group - overhead;
if (ext4_has_group_desc_csum(sb))
if (ext4_has_group_desc_csum(sb)) {
flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
EXT4_BG_INODE_UNINIT;
else
if (!test_opt(sb, INIT_INODE_TABLE))
flex_gd->bg_flags[i] |= EXT4_BG_INODE_ZEROED;
} else
flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
}
......@@ -1594,7 +1596,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
err = ext4_alloc_flex_bg_array(sb, input->group + 1);
if (err)
return err;
goto out;
err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
if (err)
......@@ -1622,7 +1624,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
/* We will update the superblock, one block bitmap, and
* one group descriptor via ext4_group_add_blocks().
*/
handle = ext4_journal_start_sb(sb, 3);
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, 3);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext4_warning(sb, "error %d on journal start", err);
......@@ -1786,7 +1788,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
credits += 3; /* block bitmap, bg descriptor, resize inode */
}
handle = ext4_journal_start_sb(sb, credits);
handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -435,7 +435,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
trace_jbd2_commit_locking(journal, commit_transaction);
stats.run.rs_wait = commit_transaction->t_max_wait;
stats.run.rs_request_delay = 0;
stats.run.rs_locked = jiffies;
if (commit_transaction->t_requested)
stats.run.rs_request_delay =
jbd2_time_diff(commit_transaction->t_requested,
stats.run.rs_locked);
stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
stats.run.rs_locked);
......@@ -1116,7 +1121,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
spin_lock(&journal->j_history_lock);
journal->j_stats.ts_tid++;
if (commit_transaction->t_requested)
journal->j_stats.ts_requested++;
journal->j_stats.run.rs_wait += stats.run.rs_wait;
journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
journal->j_stats.run.rs_running += stats.run.rs_running;
journal->j_stats.run.rs_locked += stats.run.rs_locked;
journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册