提交 81280572 编写于 作者: L Linus Torvalds

Merge branch 'upstream-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'upstream-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (50 commits)
  ext4,jbd2: convert tracepoints to use major/minor numbers
  ext4: optimize orphan_list handling for ext4_setattr
  ext4: fix unbalanced mutex unlock in error path of ext4_li_request_new
  ext4: fix compile error in ext4_fallocate()
  ext4: move ext4_mb_{get,put}_buddy_cache_lock and make them static
  ext4: rename mark_bitmap_end() to ext4_mark_bitmap_end()
  ext4: move flush_completed_IO to fs/ext4/fsync.c and make it static
  ext4: rename {ext,idx}_pblock and inline small extent functions
  ext4: make various ext4 functions be static
  ext4: rename {exit,init}_ext4_*() to ext4_{exit,init}_*()
  ext4: fix kernel oops if the journal superblock has a non-zero j_errno
  ext4: update writeback_index based on last page scanned
  ext4: implement writeback livelock avoidance using page tagging
  ext4: tidy up a void argument in inode.c
  ext4: add batched_discard into ext4 feature list
  ext4: Add batched discard support for ext4
  fs: Add FITRIM ioctl
  ext4: Use return value from sb_issue_discard()
  ext4: Check return value of sb_getblk() and friends
  ext4: use bio layer instead of buffer layer in mpage_da_submit_io
  ...
......@@ -353,6 +353,20 @@ noauto_da_alloc replacing existing files via patterns such as
system crashes before the delayed allocation
blocks are forced to disk.
noinit_itable Do not initialize any uninitialized inode table
blocks in the background. This feature may be
used by installation CD's so that the install
process can complete as quickly as possible; the
inode table initialization process would then be
deferred until the next time the file system
is unmounted.
init_itable=n The lazy itable init code will wait n times the
number of milliseconds it took to zero out the
previous block group's inode table. This
minimizes the impact on the systme performance
while file system's inode table is being initialized.
discard Controls whether ext4 should issue discard/TRIM
nodiscard(*) commands to the underlying block device when
blocks are freed. This is useful for SSD devices
......
......@@ -4,7 +4,7 @@
obj-$(CONFIG_EXT4_FS) += ext4.o
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
......
......@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* less than the blocksize * 8 ( which is the size
* of bitmap ), set rest of the block bitmap to 1
*/
mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
bh->b_data);
}
return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
}
......@@ -489,7 +490,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
* Check if filesystem has nblocks free & available for allocation.
* On success return 1, return 0 on failure.
*/
int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
{
s64 free_blocks, dirty_blocks, root_blocks;
struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
......
......@@ -29,16 +29,15 @@ struct ext4_system_zone {
static struct kmem_cache *ext4_system_zone_cachep;
int __init init_ext4_system_zone(void)
int __init ext4_init_system_zone(void)
{
ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
SLAB_RECLAIM_ACCOUNT);
ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
if (ext4_system_zone_cachep == NULL)
return -ENOMEM;
return 0;
}
void exit_ext4_system_zone(void)
void ext4_exit_system_zone(void)
{
kmem_cache_destroy(ext4_system_zone_cachep);
}
......
......@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode,
struct file *filp);
const struct file_operations ext4_dir_operations = {
.llseek = generic_file_llseek,
.llseek = ext4_llseek,
.read = generic_read_dir,
.readdir = ext4_readdir, /* we take BKL. needed?*/
.unlocked_ioctl = ext4_ioctl,
......
......@@ -168,7 +168,20 @@ struct mpage_da_data {
int pages_written;
int retval;
};
#define EXT4_IO_UNWRITTEN 0x1
/*
* Flags for ext4_io_end->flags
*/
#define EXT4_IO_END_UNWRITTEN 0x0001
#define EXT4_IO_END_ERROR 0x0002
struct ext4_io_page {
struct page *p_page;
int p_count;
};
#define MAX_IO_PAGES 128
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
......@@ -179,8 +192,18 @@ typedef struct ext4_io_end {
struct work_struct work; /* data work queue */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
int num_io_pages;
struct ext4_io_page *pages[MAX_IO_PAGES];
} ext4_io_end_t;
struct ext4_io_submit {
int io_op;
struct bio *io_bio;
ext4_io_end_t *io_end;
struct ext4_io_page *io_page;
sector_t io_next_block;
};
/*
* Special inodes numbers
*/
......@@ -205,6 +228,7 @@ typedef struct ext4_io_end {
#define EXT4_MIN_BLOCK_SIZE 1024
#define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10
#define EXT4_MAX_BLOCK_LOG_SIZE 16
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
......@@ -889,6 +913,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
......@@ -1087,7 +1112,6 @@ struct ext4_sb_info {
struct completion s_kobj_unregister;
/* Journaling */
struct inode *s_journal_inode;
struct journal_s *s_journal;
struct list_head s_orphan;
struct mutex s_orphan_lock;
......@@ -1120,10 +1144,7 @@ struct ext4_sb_info {
/* for buddy allocator */
struct ext4_group_info ***s_group_info;
struct inode *s_buddy_cache;
long s_blocks_reserved;
spinlock_t s_reserve_lock;
spinlock_t s_md_lock;
tid_t s_last_transaction;
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
......@@ -1141,7 +1162,6 @@ struct ext4_sb_info {
unsigned long s_mb_last_start;
/* stats for buddy allocator */
spinlock_t s_mb_pa_lock;
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */
......@@ -1172,6 +1192,11 @@ struct ext4_sb_info {
/* timer for periodic error stats printing */
struct timer_list s_err_report;
/* Lazy inode table initialization info */
struct ext4_li_request *s_li_request;
/* Wait multiplier for lazy initialization thread */
unsigned int s_li_wait_mult;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
......@@ -1533,7 +1558,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
extern struct proc_dir_entry *ext4_proc_root;
/*
* Timeout and state flag for lazy initialization inode thread.
*/
#define EXT4_DEF_LI_WAIT_MULT 10
#define EXT4_DEF_LI_MAX_START_DELAY 5
#define EXT4_LAZYINIT_QUIT 0x0001
#define EXT4_LAZYINIT_RUNNING 0x0002
/*
* Lazy inode table initialization info
*/
struct ext4_lazy_init {
unsigned long li_state;
wait_queue_head_t li_wait_daemon;
wait_queue_head_t li_wait_task;
struct timer_list li_timer;
struct task_struct *li_task;
struct list_head li_request_list;
struct mutex li_list_mtx;
};
struct ext4_li_request {
struct super_block *lr_super;
struct ext4_sb_info *lr_sbi;
ext4_group_t lr_next_group;
struct list_head lr_request;
unsigned long lr_next_sched;
unsigned long lr_timeout;
};
struct ext4_features {
struct kobject f_kobj;
struct completion f_kobj_unregister;
};
/*
* Function prototypes
......@@ -1561,7 +1621,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
......@@ -1605,11 +1664,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
extern unsigned long ext4_count_dirs(struct super_block *);
extern void ext4_check_inodes_bitmap(struct super_block *);
extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc);
extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern int ext4_init_inode_table(struct super_block *sb,
ext4_group_t group, int barrier);
/* mballoc.c */
extern long ext4_mb_stats;
......@@ -1620,16 +1677,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
ext4_group_t, int);
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
/* inode.c */
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
......@@ -1657,13 +1713,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int flush_completed_IO(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
/* ioctl.c */
......@@ -1960,6 +2014,7 @@ extern const struct file_operations ext4_dir_operations;
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
......@@ -1973,8 +2028,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
/* block_validity */
extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
extern int __init init_ext4_system_zone(void);
extern void exit_ext4_system_zone(void);
extern int __init ext4_init_system_zone(void);
extern void ext4_exit_system_zone(void);
extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
unsigned int count);
......@@ -2002,6 +2057,17 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 start_orig, __u64 start_donor,
__u64 len, __u64 *moved_len);
/* page-io.c */
extern int __init ext4_init_pageio(void);
extern void ext4_exit_pageio(void);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern int ext4_end_io_nolock(ext4_io_end_t *io);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
struct writeback_control *wbc);
/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
enum ext4_state_bits {
......
......@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
}
/*
* ext4_ext_pblock:
* combine low and high parts of physical block number into ext4_fsblk_t
*/
static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;
block = le32_to_cpu(ex->ee_start_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
return block;
}
/*
* ext4_idx_pblock:
* combine low and high parts of a leaf physical block number into ext4_fsblk_t
*/
static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
{
ext4_fsblk_t block;
block = le32_to_cpu(ix->ei_leaf_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
return block;
}
/*
* ext4_ext_store_pblock:
* stores a large physical block number into an extent struct,
* breaking it into parts
*/
static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
ext4_fsblk_t pb)
{
ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
0xffff);
}
/*
* ext4_idx_store_pblock:
* stores a large physical block number into an index struct,
* breaking it into parts
*/
static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
ext4_fsblk_t pb)
{
ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
0xffff);
}
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
sector_t lblocks);
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
......@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
extern int ext4_can_extents_be_merged(struct inode *inode,
struct ext4_extent *ex1,
struct ext4_extent *ex2);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
ext_prepare_callback, void *);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *);
extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
#endif /* _EXT4_EXTENTS */
......
此差异已折叠。
......@@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
return dquot_file_open(inode, filp);
}
/*
* ext4_llseek() copied from generic_file_llseek() to handle both
* block-mapped and extent-mapped maxbytes values. This should
* otherwise be identical with generic_file_llseek().
*/
loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
{
struct inode *inode = file->f_mapping->host;
loff_t maxbytes;
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
else
maxbytes = inode->i_sb->s_maxbytes;
mutex_lock(&inode->i_mutex);
switch (origin) {
case SEEK_END:
offset += inode->i_size;
break;
case SEEK_CUR:
if (offset == 0) {
mutex_unlock(&inode->i_mutex);
return file->f_pos;
}
offset += file->f_pos;
break;
}
if (offset < 0 || offset > maxbytes) {
mutex_unlock(&inode->i_mutex);
return -EINVAL;
}
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
}
mutex_unlock(&inode->i_mutex);
return offset;
}
const struct file_operations ext4_file_operations = {
.llseek = generic_file_llseek,
.llseek = ext4_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
......
......@@ -34,6 +34,89 @@
#include <trace/events/ext4.h>
static void dump_completed_IO(struct inode * inode)
{
#ifdef EXT4_DEBUG
struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1;
unsigned long flags;
if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
return;
}
ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
cur = &io->list;
before = cur->prev;
io0 = container_of(before, ext4_io_end_t, list);
after = cur->next;
io1 = container_of(after, ext4_io_end_t, list);
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
io, inode->i_ino, io0, io1);
}
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
#endif
}
/*
* This function is called from ext4_sync_file().
*
* When IO is completed, the work to convert unwritten extents to
* written is queued on workqueue but may not get immediately
* scheduled. When fsync is called, we need to ensure the
* conversion is complete before fsync returns.
* The inode keeps track of a list of pending/completed IO that
* might needs to do the conversion. This function walks through
* the list and convert the related unwritten extents for completed IO
* to written.
* The function return the number of pending IOs on success.
*/
static int flush_completed_IO(struct inode *inode)
{
ext4_io_end_t *io;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret = 0;
int ret2 = 0;
if (list_empty(&ei->i_completed_io_list))
return ret;
dump_completed_IO(inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
while (!list_empty(&ei->i_completed_io_list)){
io = list_entry(ei->i_completed_io_list.next,
ext4_io_end_t, list);
/*
* Calling ext4_end_io_nolock() to convert completed
* IO to written.
*
* When ext4_sync_file() is called, run_queue() may already
* about to flush the work corresponding to this io structure.
* It will be upset if it founds the io structure related
* to the work-to-be schedule is freed.
*
* Thus we need to keep the io structure still valid here after
* convertion finished. The io structure has a flag to
* avoid double converting from both fsync and background work
* queue work.
*/
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
ret = ext4_end_io_nolock(io);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (ret < 0)
ret2 = ret;
else
list_del_init(&io->list);
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
return (ret2 < 0) ? ret2 : 0;
}
/*
* If we're not journaling and this is a just-created file, we have to
* sync our parent directory (if it was freshly created) since
......
......@@ -50,7 +50,7 @@
* need to use it within a single byte (to ensure we get endianness right).
* We can use memset for the rest of the bitmap as there are no other users.
*/
void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
{
int i;
......@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
}
/* Initializes an uninitialized inode bitmap */
unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
static unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
......@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
}
memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
return EXT4_INODES_PER_GROUP(sb);
......@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
bitmap_blk = ext4_inode_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
......@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
unlock_buffer(bh);
return bh;
}
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
ext4_init_inode_bitmap(sb, bh, block_group, desc);
......@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
ext4_unlock_group(sb, block_group);
if (buffer_uptodate(bh)) {
/*
* if not uninit if bh is uptodate,
......@@ -411,8 +415,8 @@ struct orlov_stats {
* for a particular block group or flex_bg. If flex_size is 1, then g
* is a block group number; otherwise it is flex_bg number.
*/
void get_orlov_stats(struct super_block *sb, ext4_group_t g,
int flex_size, struct orlov_stats *stats)
static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
int flex_size, struct orlov_stats *stats)
{
struct ext4_group_desc *desc;
struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
......@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
{
int free = 0, retval = 0, count;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
/*
* We have to be sure that new inode allocation does not race with
* inode table initialization, because otherwise we may end up
* allocating and writing new inode right before sb_issue_zeroout
* takes place and overwriting our new inode with zeroes. So we
* take alloc_sem to prevent it.
*/
down_read(&grp->alloc_sem);
ext4_lock_group(sb, group);
if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
/* not a free inode */
......@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
ino > EXT4_INODES_PER_GROUP(sb)) {
ext4_unlock_group(sb, group);
up_read(&grp->alloc_sem);
ext4_error(sb, "reserved inode or inode > inodes count - "
"block_group = %u, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb));
......@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
err_ret:
ext4_unlock_group(sb, group);
up_read(&grp->alloc_sem);
return retval;
}
......@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
}
return count;
}
/*
* Zeroes not yet zeroed inode table - just write zeroes through the whole
* inode table. Must be called without any spinlock held. The only place
* where it is called from on active part of filesystem is ext4lazyinit
* thread, so we do not need any special locks, however we have to prevent
* inode allocation from the current group, so we take alloc_sem lock, to
* block ext4_claim_inode until we are finished.
*/
extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int barrier)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp = NULL;
struct buffer_head *group_desc_bh;
handle_t *handle;
ext4_fsblk_t blk;
int num, ret = 0, used_blks = 0;
/* This should not happen, but just to be sure check this */
if (sb->s_flags & MS_RDONLY) {
ret = 1;
goto out;
}
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
if (!gdp)
goto out;
/*
* We do not need to lock this, because we are the only one
* handling this flag.
*/
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
goto out;
handle = ext4_journal_start_sb(sb, 1);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
down_write(&grp->alloc_sem);
/*
* If inode bitmap was already initialized there may be some
* used inodes so we need to skip blocks with used inodes in
* inode table.
*/
if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
ext4_itable_unused_count(sb, gdp)),
sbi->s_inodes_per_block);
if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
ext4_error(sb, "Something is wrong with group %u\n"
"Used itable blocks: %d"
"itable unused count: %u\n",
group, used_blks,
ext4_itable_unused_count(sb, gdp));
ret = 1;
goto out;
}
blk = ext4_inode_table(sb, gdp) + used_blks;
num = sbi->s_itb_per_group - used_blks;
BUFFER_TRACE(group_desc_bh, "get_write_access");
ret = ext4_journal_get_write_access(handle,
group_desc_bh);
if (ret)
goto err_out;
/*
* Skip zeroout if the inode table is full. But we set the ZEROED
* flag anyway, because obviously, when it is full it does not need
* further zeroing.
*/
if (unlikely(num == 0))
goto skip_zeroout;
ext4_debug("going to zero out inode table in group %d\n",
group);
ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
if (ret < 0)
goto err_out;
if (barrier)
blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
skip_zeroout:
ext4_lock_group(sb, group);
gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
ext4_unlock_group(sb, group);
BUFFER_TRACE(group_desc_bh,
"call ext4_handle_dirty_metadata");
ret = ext4_handle_dirty_metadata(handle, NULL,
group_desc_bh);
err_out:
up_write(&grp->alloc_sem);
ext4_journal_stop(handle);
out:
return ret;
}
此差异已折叠。
此差异已折叠。
......@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
struct buffer_head *bh;
struct ext4_extent_header *eh;
block = idx_pblock(ix);
block = ext4_idx_pblock(ix);
bh = sb_bread(inode->i_sb, block);
if (!bh)
return -EIO;
......
......@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
/* leaf block */
*extent = ++path[ppos].p_ext;
path[ppos].p_block = ext_pblock(path[ppos].p_ext);
path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
return 0;
}
......@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
/* index block */
path[ppos].p_idx++;
path[ppos].p_block = idx_pblock(path[ppos].p_idx);
path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
if (path[ppos+1].p_bh)
brelse(path[ppos+1].p_bh);
path[ppos+1].p_bh =
......@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
path[cur_ppos].p_idx =
EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
path[cur_ppos].p_block =
idx_pblock(path[cur_ppos].p_idx);
ext4_idx_pblock(path[cur_ppos].p_idx);
if (path[cur_ppos+1].p_bh)
brelse(path[cur_ppos+1].p_bh);
path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
......@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
path[leaf_ppos].p_ext = *extent =
EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
path[leaf_ppos].p_block =
ext_pblock(path[leaf_ppos].p_ext);
ext4_ext_pblock(path[leaf_ppos].p_ext);
return 0;
}
}
......@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
}
o_start->ee_len = start_ext->ee_len;
......@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
*/
o_end->ee_block = end_ext->ee_block;
o_end->ee_len = end_ext->ee_len;
ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
/*
* Set 0 to the extent block if new_ext was
......@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
/* Insert new entry */
if (new_ext->ee_len) {
o_start[i] = *new_ext;
ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
}
/* Insert end entry */
......@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
start_ext.ee_len = end_ext.ee_len = 0;
new_ext.ee_block = cpu_to_le32(*from);
ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
new_ext.ee_len = dext->ee_len;
new_ext_alen = ext4_ext_get_actual_len(&new_ext);
new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
......@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
copy_extent_status(oext, &end_ext);
end_ext_alen = ext4_ext_get_actual_len(&end_ext);
ext4_ext_store_pblock(&end_ext,
(ext_pblock(o_end) + oext_alen - end_ext_alen));
(ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
end_ext.ee_block =
cpu_to_le32(le32_to_cpu(o_end->ee_block) +
oext_alen - end_ext_alen);
......@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
/* When tmp_dext is too large, pick up the target range. */
diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
tmp_dext->ee_block =
cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
......@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
tmp_dext->ee_len = cpu_to_le16(max_count);
orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
/* Adjust extent length if donor extent is larger than orig */
if (ext4_ext_get_actual_len(tmp_dext) >
......
......@@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
ext4_lblk_t start, block, b;
const u8 *name = d_name->name;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
......@@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
namelen = d_name->len;
if (namelen > EXT4_NAME_LEN)
return NULL;
if ((namelen <= 2) && (name[0] == '.') &&
(name[1] == '.' || name[1] == '0')) {
/*
* "." or ".." will only be in the first block
* NFS may look up ".."; "." should be handled by the VFS
*/
block = start = 0;
nblocks = 1;
goto restart;
}
if (is_dx(dir)) {
bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
/*
......@@ -960,55 +971,35 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir, int *err)
{
struct super_block * sb;
struct super_block * sb = dir->i_sb;
struct dx_hash_info hinfo;
u32 hash;
struct dx_frame frames[2], *frame;
struct ext4_dir_entry_2 *de, *top;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
int namelen = d_name->len;
const u8 *name = d_name->name;
sb = dir->i_sb;
/* NFS may look up ".." - look at dx_root directory block */
if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
return NULL;
} else {
frame = frames;
frame->bh = NULL; /* for dx_release() */
frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
dx_set_block(frame->at, 0); /* dx_root block is 0 */
}
hash = hinfo.hash;
if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
return NULL;
do {
block = dx_get_block(frame->at);
if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
goto errout;
de = (struct ext4_dir_entry_2 *) bh->b_data;
top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT4_DIR_REC_LEN(0));
for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
+ ((char *) de - bh->b_data);
if (!ext4_check_dir_entry(dir, de, bh, off)) {
brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
if (ext4_match(namelen, name, de)) {
*res_dir = de;
dx_release(frames);
return bh;
}
retval = search_dirblock(bh, dir, d_name,
block << EXT4_BLOCK_SIZE_BITS(sb),
res_dir);
if (retval == 1) { /* Success! */
dx_release(frames);
return bh;
}
brelse(bh);
if (retval == -1) {
*err = ERR_BAD_DX_DIR;
goto errout;
}
/* Check to see if we should continue to search */
retval = ext4_htree_next_block(dir, hash, frame,
retval = ext4_htree_next_block(dir, hinfo.hash, frame,
frames, NULL);
if (retval < 0) {
ext4_warning(sb,
......
/*
* linux/fs/ext4/page-io.c
*
* This contains the new page_io functions for ext4
*
* Written by Theodore Ts'o, 2010.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/jbd2.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/namei.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "ext4_extents.h"
static struct kmem_cache *io_page_cachep, *io_end_cachep;
int __init ext4_init_pageio(void)
{
io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
if (io_page_cachep == NULL)
return -ENOMEM;
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
if (io_page_cachep == NULL) {
kmem_cache_destroy(io_page_cachep);
return -ENOMEM;
}
return 0;
}
void ext4_exit_pageio(void)
{
kmem_cache_destroy(io_end_cachep);
kmem_cache_destroy(io_page_cachep);
}
void ext4_free_io_end(ext4_io_end_t *io)
{
int i;
BUG_ON(!io);
if (io->page)
put_page(io->page);
for (i = 0; i < io->num_io_pages; i++) {
if (--io->pages[i]->p_count == 0) {
struct page *page = io->pages[i]->p_page;
end_page_writeback(page);
put_page(page);
kmem_cache_free(io_page_cachep, io->pages[i]);
}
}
io->num_io_pages = 0;
iput(io->inode);
kmem_cache_free(io_end_cachep, io);
}
/*
* check a range of space and convert unwritten extents to written.
*/
int ext4_end_io_nolock(ext4_io_end_t *io)
{
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev);
if (list_empty(&io->list))
return ret;
if (!(io->flag & EXT4_IO_END_UNWRITTEN))
return ret;
ret = ext4_convert_unwritten_extents(inode, offset, size);
if (ret < 0) {
printk(KERN_EMERG "%s: failed to convert unwritten "
"extents to written extents, error is %d "
"io is still on inode %lu aio dio list\n",
__func__, ret, inode->i_ino);
return ret;
}
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
io->flag &= ~EXT4_IO_END_UNWRITTEN;
return ret;
}
/*
* work on completed aio dio IO, to convert unwritten extents to extents
*/
static void ext4_end_io_work(struct work_struct *work)
{
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
struct inode *inode = io->inode;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret;
mutex_lock(&inode->i_mutex);
ret = ext4_end_io_nolock(io);
if (ret < 0) {
mutex_unlock(&inode->i_mutex);
return;
}
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (!list_empty(&io->list))
list_del_init(&io->list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
mutex_unlock(&inode->i_mutex);
ext4_free_io_end(io);
}
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{
ext4_io_end_t *io = NULL;
io = kmem_cache_alloc(io_end_cachep, flags);
if (io) {
memset(io, 0, sizeof(*io));
io->inode = igrab(inode);
BUG_ON(!io->inode);
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
return io;
}
/*
* Print an buffer I/O error compatible with the fs/buffer.c. This
* provides compatibility with dmesg scrapers that look for a specific
* buffer I/O error message. We really need a unified error reporting
* structure to userspace ala Digital Unix's uerf system, but it's
* probably not going to happen in my lifetime, due to LKML politics...
*/
static void buffer_io_error(struct buffer_head *bh)
{
char b[BDEVNAME_SIZE];
printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
bdevname(bh->b_bdev, b),
(unsigned long long)bh->b_blocknr);
}
static void ext4_end_bio(struct bio *bio, int error)
{
ext4_io_end_t *io_end = bio->bi_private;
struct workqueue_struct *wq;
struct inode *inode;
unsigned long flags;
ext4_fsblk_t err_block;
int i;
BUG_ON(!io_end);
inode = io_end->inode;
bio->bi_private = NULL;
bio->bi_end_io = NULL;
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
error = 0;
err_block = bio->bi_sector >> (inode->i_blkbits - 9);
bio_put(bio);
if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
pr_err("sb umounted, discard end_io request for inode %lu\n",
io_end->inode->i_ino);
ext4_free_io_end(io_end);
return;
}
if (error) {
io_end->flag |= EXT4_IO_END_ERROR;
ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long) err_block);
}
for (i = 0; i < io_end->num_io_pages; i++) {
struct page *page = io_end->pages[i]->p_page;
struct buffer_head *bh, *head;
int partial_write = 0;
head = page_buffers(page);
if (error)
SetPageError(page);
BUG_ON(!head);
if (head->b_size == PAGE_CACHE_SIZE)
clear_buffer_dirty(head);
else {
loff_t offset;
loff_t io_end_offset = io_end->offset + io_end->size;
offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
bh = head;
do {
if ((offset >= io_end->offset) &&
(offset+bh->b_size <= io_end_offset)) {
if (error)
buffer_io_error(bh);
clear_buffer_dirty(bh);
}
if (buffer_delay(bh))
partial_write = 1;
else if (!buffer_mapped(bh))
clear_buffer_dirty(bh);
else if (buffer_dirty(bh))
partial_write = 1;
offset += bh->b_size;
bh = bh->b_this_page;
} while (bh != head);
}
if (--io_end->pages[i]->p_count == 0) {
struct page *page = io_end->pages[i]->p_page;
end_page_writeback(page);
put_page(page);
kmem_cache_free(io_page_cachep, io_end->pages[i]);
}
/*
* If this is a partial write which happened to make
* all buffers uptodate then we can optimize away a
* bogus readpage() for the next read(). Here we
* 'discover' whether the page went uptodate as a
* result of this (potentially partial) write.
*/
if (!partial_write)
SetPageUptodate(page);
}
io_end->num_io_pages = 0;
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
}
void ext4_io_submit(struct ext4_io_submit *io)
{
struct bio *bio = io->io_bio;
if (bio) {
bio_get(io->io_bio);
submit_bio(io->io_op, io->io_bio);
BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
bio_put(io->io_bio);
}
io->io_bio = 0;
io->io_op = 0;
io->io_end = 0;
}
static int io_submit_init(struct ext4_io_submit *io,
struct inode *inode,
struct writeback_control *wbc,
struct buffer_head *bh)
{
ext4_io_end_t *io_end;
struct page *page = bh->b_page;
int nvecs = bio_get_nr_vecs(bh->b_bdev);
struct bio *bio;
io_end = ext4_init_io_end(inode, GFP_NOFS);
if (!io_end)
return -ENOMEM;
do {
bio = bio_alloc(GFP_NOIO, nvecs);
nvecs >>= 1;
} while (bio == NULL);
bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio->bi_private = io->io_end = io_end;
bio->bi_end_io = ext4_end_bio;
io_end->inode = inode;
io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
io->io_bio = bio;
io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC_PLUG : WRITE);
io->io_next_block = bh->b_blocknr;
return 0;
}
static int io_submit_add_bh(struct ext4_io_submit *io,
struct ext4_io_page *io_page,
struct inode *inode,
struct writeback_control *wbc,
struct buffer_head *bh)
{
ext4_io_end_t *io_end;
int ret;
if (buffer_new(bh)) {
clear_buffer_new(bh);
unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
}
if (!buffer_mapped(bh) || buffer_delay(bh)) {
if (!buffer_mapped(bh))
clear_buffer_dirty(bh);
if (io->io_bio)
ext4_io_submit(io);
return 0;
}
if (io->io_bio && bh->b_blocknr != io->io_next_block) {
submit_and_retry:
ext4_io_submit(io);
}
if (io->io_bio == NULL) {
ret = io_submit_init(io, inode, wbc, bh);
if (ret)
return ret;
}
io_end = io->io_end;
if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
(io_end->pages[io_end->num_io_pages-1] != io_page))
goto submit_and_retry;
if (buffer_uninit(bh))
io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
io->io_end->size += bh->b_size;
io->io_next_block++;
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
if ((io_end->num_io_pages == 0) ||
(io_end->pages[io_end->num_io_pages-1] != io_page)) {
io_end->pages[io_end->num_io_pages++] = io_page;
io_page->p_count++;
}
return 0;
}
int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
unsigned block_start, block_end, blocksize;
struct ext4_io_page *io_page;
struct buffer_head *bh, *head;
int ret = 0;
blocksize = 1 << inode->i_blkbits;
BUG_ON(PageWriteback(page));
set_page_writeback(page);
ClearPageError(page);
io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
if (!io_page) {
set_page_dirty(page);
unlock_page(page);
return -ENOMEM;
}
io_page->p_page = page;
io_page->p_count = 0;
get_page(page);
for (bh = head = page_buffers(page), block_start = 0;
bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (block_start >= len) {
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
}
ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
* we can do but mark the page as dirty, and
* better luck next time.
*/
set_page_dirty(page);
break;
}
}
unlock_page(page);
/*
* If the page was truncated before we could do the writeback,
* or we had a memory allocation error while trying to write
* the first buffer head, we won't have submitted any pages for
* I/O. In that case we need to make sure we've cleared the
* PageWriteback bit from the page to prevent the system from
* wedging later on.
*/
if (io_page->p_count == 0) {
put_page(page);
end_page_writeback(page);
kmem_cache_free(io_page_cachep, io_page);
}
return ret;
}
......@@ -226,23 +226,13 @@ static int setup_new_group_blocks(struct super_block *sb,
}
/* Zero out all of the reserved backup group descriptor table blocks */
for (i = 0, bit = gdblocks + 1, block = start + bit;
i < reserved_gdb; i++, block++, bit++) {
struct buffer_head *gdb;
ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh;
ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
block, sbi->s_itb_per_group);
err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
GFP_NOFS);
if (err)
goto exit_bh;
if (IS_ERR(gdb = bclean(handle, sb, block))) {
err = PTR_ERR(gdb);
goto exit_bh;
}
ext4_handle_dirty_metadata(handle, NULL, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
input->block_bitmap - start);
ext4_set_bit(input->block_bitmap - start, bh->b_data);
......@@ -251,28 +241,18 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_set_bit(input->inode_bitmap - start, bh->b_data);
/* Zero out all of the inode table blocks */
for (i = 0, block = input->inode_table, bit = block - start;
i < sbi->s_itb_per_group; i++, bit++, block++) {
struct buffer_head *it;
ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh;
if (IS_ERR(it = bclean(handle, sb, block))) {
err = PTR_ERR(it);
goto exit_bh;
}
ext4_handle_dirty_metadata(handle, NULL, it);
brelse(it);
ext4_set_bit(bit, bh->b_data);
}
block = input->inode_table;
ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
block, sbi->s_itb_per_group);
err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
if (err)
goto exit_bh;
if ((err = extend_or_restart_transaction(handle, 2, bh)))
goto exit_bh;
mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
/* Mark unused entries in inode bitmap used */
......@@ -283,8 +263,8 @@ static int setup_new_group_blocks(struct super_block *sb,
goto exit_journal;
}
mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
exit_bh:
brelse(bh);
......
此差异已折叠。
......@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#undef BLOCK_HASH_SHIFT
int __init
init_ext4_xattr(void)
ext4_init_xattr(void)
{
ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
if (!ext4_xattr_cache)
......@@ -1597,7 +1597,7 @@ init_ext4_xattr(void)
}
void
exit_ext4_xattr(void)
ext4_exit_xattr(void)
{
if (ext4_xattr_cache)
mb_cache_destroy(ext4_xattr_cache);
......
......@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
extern int init_ext4_xattr(void);
extern void exit_ext4_xattr(void);
extern int __init ext4_init_xattr(void);
extern void ext4_exit_xattr(void);
extern const struct xattr_handler *ext4_xattr_handlers[];
......@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb)
{
}
static inline int
static __init inline int
init_ext4_xattr(void)
{
return 0;
}
static inline void
exit_ext4_xattr(void)
ext4_exit_xattr(void)
{
}
......
......@@ -530,6 +530,41 @@ static int ioctl_fsthaw(struct file *filp)
return thaw_super(sb);
}
static int ioctl_fstrim(struct file *filp, void __user *argp)
{
struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
/* If filesystem doesn't support trim feature, return. */
if (sb->s_op->trim_fs == NULL)
return -EOPNOTSUPP;
/* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
if (sb->s_bdev == NULL)
return -EINVAL;
if (argp == NULL) {
range.start = 0;
range.len = ULLONG_MAX;
range.minlen = 0;
} else if (copy_from_user(&range, argp, sizeof(range)))
return -EFAULT;
ret = sb->s_op->trim_fs(sb, &range);
if (ret < 0)
return ret;
if ((argp != NULL) &&
(copy_to_user(argp, &range, sizeof(range))))
return -EFAULT;
return 0;
}
/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
......@@ -580,6 +615,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
error = ioctl_fsthaw(filp);
break;
case FITRIM:
error = ioctl_fstrim(filp, argp);
break;
case FS_IOC_FIEMAP:
return ioctl_fiemap(filp, arg);
......
......@@ -299,6 +299,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
if (unlikely(journal->j_flags & JBD2_UNMOUNT))
/*
* The journal thread is dead; so starting and
* waiting for a commit to finish will cause
* us to wait for a _very_ long time.
*/
printk(KERN_ERR "JBD2: %s: "
"Waiting for Godot: block %llu\n",
journal->j_devname,
(unsigned long long) bh->b_blocknr);
jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid);
ret = 1;
......
......@@ -26,7 +26,9 @@
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/bitops.h>
#include <trace/events/jbd2.h>
#include <asm/system.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
......@@ -201,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
mapping = jinode->i_vfs_inode->i_mapping;
jinode->i_flags |= JI_COMMIT_RUNNING;
set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
/*
* submit the inode data buffers. We use writepage
......@@ -216,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
commit_transaction->t_flushed_data_blocks = 1;
jinode->i_flags &= ~JI_COMMIT_RUNNING;
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
spin_unlock(&journal->j_list_lock);
......@@ -237,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
jinode->i_flags |= JI_COMMIT_RUNNING;
set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) {
......@@ -253,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
ret = err;
}
spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING;
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
......
......@@ -42,12 +42,14 @@
#include <linux/log2.h>
#include <linux/vmalloc.h>
#include <linux/backing-dev.h>
#include <linux/bitops.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/system.h>
EXPORT_SYMBOL(jbd2_journal_extend);
EXPORT_SYMBOL(jbd2_journal_stop);
......@@ -2210,7 +2212,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
restart:
spin_lock(&journal->j_list_lock);
/* Is commit writing out inode - we have to wait */
if (jinode->i_flags & JI_COMMIT_RUNNING) {
if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
......
......@@ -156,6 +156,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
*/
repeat:
read_lock(&journal->j_state_lock);
BUG_ON(journal->j_flags & JBD2_UNMOUNT);
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
read_unlock(&journal->j_state_lock);
......
......@@ -891,6 +891,14 @@ static inline int sb_issue_discard(struct super_block *sb, sector_t block,
nr_blocks << (sb->s_blocksize_bits - 9),
gfp_mask, flags);
}
static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask)
{
return blkdev_issue_zeroout(sb->s_bdev,
block << (sb->s_blocksize_bits - 9),
nr_blocks << (sb->s_blocksize_bits - 9),
gfp_mask);
}
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
......
......@@ -32,6 +32,12 @@
#define SEEK_END 2 /* seek relative to end of file */
#define SEEK_MAX SEEK_END
struct fstrim_range {
uint64_t start;
uint64_t len;
uint64_t minlen;
};
/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
unsigned long nr_files; /* read only */
......@@ -317,6 +323,7 @@ struct inodes_stat_t {
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
#define FITHAW _IOWR('X', 120, int) /* Thaw */
#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
#define FS_IOC_GETFLAGS _IOR('f', 1, long)
#define FS_IOC_SETFLAGS _IOW('f', 2, long)
......@@ -1604,6 +1611,7 @@ struct super_operations {
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
int (*trim_fs) (struct super_block *, struct fstrim_range *);
};
/*
......
......@@ -395,7 +395,7 @@ struct jbd2_inode {
struct inode *i_vfs_inode;
/* Flags of inode [j_list_lock] */
unsigned int i_flags;
unsigned long i_flags;
};
struct jbd2_revoke_table_s;
......
......@@ -78,6 +78,11 @@ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
return 1;
}
static inline int percpu_counter_initialized(struct percpu_counter *fbc)
{
return (fbc->counters != NULL);
}
#else
struct percpu_counter {
......@@ -143,6 +148,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
return percpu_counter_read(fbc);
}
static inline int percpu_counter_initialized(struct percpu_counter *fbc)
{
return 1;
}
#endif /* CONFIG_SMP */
static inline void percpu_counter_inc(struct percpu_counter *fbc)
......
......@@ -141,6 +141,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
int generic_writepages(struct address_space *mapping,
struct writeback_control *wbc);
void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data);
......
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册