提交 8cd226ca 编写于 作者: L Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (50 commits)
  jbd2: sparse pointer use of zero as null
  jbd2: Use round-jiffies() function for the "5 second" ext4/jbd2 wakeup
  jbd2: Mark jbd2 slabs as SLAB_TEMPORARY
  jbd2: add lockdep support
  ext4: Use the ext4_ext_actual_len() helper function
  ext4: fix uniniatilized extent splitting error
  ext4: Check for return value from sb_set_blocksize
  ext4: Add stripe= option to /proc/mounts
  ext4: Enable the multiblock allocator by default
  ext4: Add multi block allocator for ext4
  ext4: Add new functions for searching extent tree
  ext4: Add ext4_find_next_bit()
  ext4: fix up EXT4FS_DEBUG builds
  ext4: Fix ext4_show_options to show the correct mount options.
  ext4: Add EXT4_IOC_MIGRATE ioctl
  ext4: Add inode version support in ext4
  vfs: Add 64 bit i_version support
  ext4: Add the journal checksum feature
  jbd2: jbd2 stats through procfs
  ext4: Take read lock during overwrite case.
  ...
......@@ -86,9 +86,21 @@ Alex is working on a new set of patches right now.
When mounting an ext4 filesystem, the following option are accepted:
(*) == default
extents ext4 will use extents to address file data. The
extents (*) ext4 will use extents to address file data. The
file system will no longer be mountable by ext3.
noextents ext4 will not use extents for newly created files
journal_checksum Enable checksumming of the journal transactions.
This will allow the recovery code in e2fsck and the
kernel to detect corruption in the kernel. It is a
compatible change and will be ignored by older kernels.
journal_async_commit Commit block can be written to disk without waiting
for descriptor blocks. If enabled older kernels cannot
mount the device. This will enable 'journal_checksum'
internally.
journal=update Update the ext4 file system's journal to the current
format.
......@@ -196,6 +208,12 @@ nobh (a) cache disk block mapping information
"nobh" option tries to avoid associating buffer
heads (supported only for "writeback" mode).
mballoc (*) Use the multiple block allocator for block allocation
nomballoc disabled multiple block allocator for block allocation.
stripe=n Number of filesystem blocks that mballoc will try
to use for allocation size and alignment. For RAID5/6
systems this should be the number of data
disks * RAID chunk size in file system blocks.
Data Mode
---------
......
......@@ -857,6 +857,45 @@ CPUs.
The "procs_blocked" line gives the number of processes currently blocked,
waiting for I/O to complete.
1.9 Ext4 file system parameters
------------------------------
Ext4 file system have one directory per partition under /proc/fs/ext4/
# ls /proc/fs/ext4/hdc/
group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req
stats stream_req
mb_groups:
This file gives the details of mutiblock allocator buddy cache of free blocks
mb_history:
Multiblock allocation history.
stats:
This file indicate whether the multiblock allocator should start collecting
statistics. The statistics are shown during unmount
group_prealloc:
The multiblock allocator normalize the block allocation request to
group_prealloc filesystem blocks if we don't have strip value set.
The stripe value can be specified at mount time or during mke2fs.
max_to_scan:
How long multiblock allocator can look for a best extent (in found extents)
min_to_scan:
How long multiblock allocator must look for a best extent
order2_req:
Multiblock allocator use 2^N search using buddies only for requests greater
than or equal to order2_req. The request size is specfied in file system
blocks. A value of 2 indicate only if the requests are greater than or equal
to 4 blocks.
stream_req:
Files smaller than stream_req are served by the stream allocator, whose
purpose is to pack requests as close each to other as possible to
produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
filesystem block size will use group based preallocation.
------------------------------------------------------------------------------
Summary
......
......@@ -236,6 +236,7 @@ config JBD_DEBUG
config JBD2
tristate
select CRC32
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
......
......@@ -546,11 +546,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
dentry->d_op = &afs_fs_dentry_operations;
d_add(dentry, inode);
_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }",
_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
fid.vnode,
fid.unique,
dentry->d_inode->i_ino,
dentry->d_inode->i_version);
(unsigned long long)dentry->d_inode->i_version);
return NULL;
}
......@@ -630,9 +630,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
* been deleted and replaced, and the original vnode ID has
* been reused */
if (fid.unique != vnode->fid.unique) {
_debug("%s: file deleted (uq %u -> %u I:%lu)",
_debug("%s: file deleted (uq %u -> %u I:%llu)",
dentry->d_name.name, fid.unique,
vnode->fid.unique, dentry->d_inode->i_version);
vnode->fid.unique,
(unsigned long long)dentry->d_inode->i_version);
spin_lock(&vnode->lock);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
spin_unlock(&vnode->lock);
......
......@@ -301,7 +301,8 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
inode = dentry->d_inode;
_enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version);
_enter("{ ino=%lu v=%llu }", inode->i_ino,
(unsigned long long)inode->i_version);
generic_fillattr(inode, stat);
return 0;
......
......@@ -3213,6 +3213,50 @@ static int buffer_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
/**
* bh_uptodate_or_lock: Test whether the buffer is uptodate
* @bh: struct buffer_head
*
* Return true if the buffer is up-to-date and false,
* with the buffer locked, if not.
*/
int bh_uptodate_or_lock(struct buffer_head *bh)
{
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
if (!buffer_uptodate(bh))
return 0;
unlock_buffer(bh);
}
return 1;
}
EXPORT_SYMBOL(bh_uptodate_or_lock);
/**
* bh_submit_read: Submit a locked buffer for reading
* @bh: struct buffer_head
*
* Returns zero on success and -EIO on error.
*/
int bh_submit_read(struct buffer_head *bh)
{
BUG_ON(!buffer_locked(bh));
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
return 0;
}
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return 0;
return -EIO;
}
EXPORT_SYMBOL(bh_submit_read);
void __init buffer_init(void)
{
int nrpages;
......
......@@ -680,11 +680,31 @@ static int ext2_check_descriptors (struct super_block * sb)
static loff_t ext2_max_size(int bits)
{
loff_t res = EXT2_NDIR_BLOCKS;
/* This constant is calculated to be the largest file size for a
* dense, 4k-blocksize file such that the total number of
int meta_blocks;
loff_t upper_limit;
/* This is calculated to be the largest file size for a
* dense, file such that the total number of
* sectors in the file, including data and all indirect blocks,
* does not exceed 2^32. */
const loff_t upper_limit = 0x1ff7fffd000LL;
* does not exceed 2^32 -1
* __u32 i_blocks representing the total number of
* 512 bytes blocks of the file
*/
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
upper_limit >>= (bits - 9);
/* indirect blocks */
meta_blocks = 1;
/* double indirect blocks */
meta_blocks += 1 + (1LL << (bits-2));
/* tripple indirect blocks */
meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
upper_limit -= meta_blocks;
upper_limit <<= bits;
res += 1LL << (bits-2);
res += 1LL << (2*(bits-2));
......@@ -692,6 +712,10 @@ static loff_t ext2_max_size(int bits)
res <<= bits;
if (res > upper_limit)
res = upper_limit;
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
return res;
}
......
......@@ -1436,11 +1436,31 @@ static void ext3_orphan_cleanup (struct super_block * sb,
static loff_t ext3_max_size(int bits)
{
loff_t res = EXT3_NDIR_BLOCKS;
/* This constant is calculated to be the largest file size for a
* dense, 4k-blocksize file such that the total number of
int meta_blocks;
loff_t upper_limit;
/* This is calculated to be the largest file size for a
* dense, file such that the total number of
* sectors in the file, including data and all indirect blocks,
* does not exceed 2^32. */
const loff_t upper_limit = 0x1ff7fffd000LL;
* does not exceed 2^32 -1
* __u32 i_blocks representing the total number of
* 512 bytes blocks of the file
*/
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
upper_limit >>= (bits - 9);
/* indirect blocks */
meta_blocks = 1;
/* double indirect blocks */
meta_blocks += 1 + (1LL << (bits-2));
/* tripple indirect blocks */
meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
upper_limit -= meta_blocks;
upper_limit <<= bits;
res += 1LL << (bits-2);
res += 1LL << (2*(bits-2));
......@@ -1448,6 +1468,10 @@ static loff_t ext3_max_size(int bits)
res <<= bits;
if (res > upper_limit)
res = upper_limit;
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
return res;
}
......
......@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o
ext4_jbd2.o migrate.o mballoc.o
ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
......
......@@ -29,7 +29,7 @@
* Calculate the block group number and offset, given a block number
*/
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
unsigned long *blockgrpp, ext4_grpblk_t *offsetp)
ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
ext4_grpblk_t offset;
......@@ -46,7 +46,7 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
/* Initializes an uninitialized block bitmap if given, and returns the
* number of blocks free in the group. */
unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
int block_group, struct ext4_group_desc *gdp)
ext4_group_t block_group, struct ext4_group_desc *gdp)
{
unsigned long start;
int bit, bit_max;
......@@ -60,7 +60,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, __FUNCTION__,
"Checksum bad for group %u\n", block_group);
"Checksum bad for group %lu\n", block_group);
gdp->bg_free_blocks_count = 0;
gdp->bg_free_inodes_count = 0;
gdp->bg_itable_unused = 0;
......@@ -153,7 +153,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* group descriptor
*/
struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
unsigned int block_group,
ext4_group_t block_group,
struct buffer_head ** bh)
{
unsigned long group_desc;
......@@ -164,7 +164,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
if (block_group >= sbi->s_groups_count) {
ext4_error (sb, "ext4_get_group_desc",
"block_group >= groups_count - "
"block_group = %d, groups_count = %lu",
"block_group = %lu, groups_count = %lu",
block_group, sbi->s_groups_count);
return NULL;
......@@ -176,7 +176,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
if (!sbi->s_group_desc[group_desc]) {
ext4_error (sb, "ext4_get_group_desc",
"Group descriptor not loaded - "
"block_group = %d, group_desc = %lu, desc = %lu",
"block_group = %lu, group_desc = %lu, desc = %lu",
block_group, group_desc, offset);
return NULL;
}
......@@ -189,18 +189,70 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
return desc;
}
static int ext4_valid_block_bitmap(struct super_block *sb,
struct ext4_group_desc *desc,
unsigned int block_group,
struct buffer_head *bh)
{
ext4_grpblk_t offset;
ext4_grpblk_t next_zero_bit;
ext4_fsblk_t bitmap_blk;
ext4_fsblk_t group_first_block;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
/* with FLEX_BG, the inode/block bitmaps and itable
* blocks may not be in the group at all
* so the bitmap validation will be skipped for those groups
* or it has to also read the block group where the bitmaps
* are located to verify they are set.
*/
return 1;
}
group_first_block = ext4_group_first_block_no(sb, block_group);
/* check whether block bitmap block number is set */
bitmap_blk = ext4_block_bitmap(sb, desc);
offset = bitmap_blk - group_first_block;
if (!ext4_test_bit(offset, bh->b_data))
/* bad block bitmap */
goto err_out;
/* check whether the inode bitmap block number is set */
bitmap_blk = ext4_inode_bitmap(sb, desc);
offset = bitmap_blk - group_first_block;
if (!ext4_test_bit(offset, bh->b_data))
/* bad block bitmap */
goto err_out;
/* check whether the inode table block number is set */
bitmap_blk = ext4_inode_table(sb, desc);
offset = bitmap_blk - group_first_block;
next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
offset + EXT4_SB(sb)->s_itb_per_group,
offset);
if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group)
/* good bitmap for inode tables */
return 1;
err_out:
ext4_error(sb, __FUNCTION__,
"Invalid block bitmap - "
"block_group = %d, block = %llu",
block_group, bitmap_blk);
return 0;
}
/**
* read_block_bitmap()
* @sb: super block
* @block_group: given block group
*
* Read the bitmap for a given block_group, reading into the specified
* slot in the superblock's bitmap cache.
* Read the bitmap for a given block_group,and validate the
* bits for block/inode/inode tables are set in the bitmaps
*
* Return buffer_head on success or NULL in case of failure.
*/
struct buffer_head *
read_block_bitmap(struct super_block *sb, unsigned int block_group)
read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc * desc;
struct buffer_head * bh = NULL;
......@@ -210,25 +262,36 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
if (!desc)
return NULL;
bitmap_blk = ext4_block_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
ext4_error(sb, __FUNCTION__,
"Cannot read block bitmap - "
"block_group = %d, block_bitmap = %llu",
(int)block_group, (unsigned long long)bitmap_blk);
return NULL;
}
if (bh_uptodate_or_lock(bh))
return bh;
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
bh = sb_getblk(sb, bitmap_blk);
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
if (!buffer_uptodate(bh)) {
ext4_init_block_bitmap(sb, bh, block_group,
desc);
set_buffer_uptodate(bh);
}
unlock_buffer(bh);
}
} else {
bh = sb_bread(sb, bitmap_blk);
ext4_init_block_bitmap(sb, bh, block_group, desc);
set_buffer_uptodate(bh);
unlock_buffer(bh);
return bh;
}
if (!bh)
ext4_error (sb, __FUNCTION__,
if (bh_submit_read(bh) < 0) {
put_bh(bh);
ext4_error(sb, __FUNCTION__,
"Cannot read block bitmap - "
"block_group = %d, block_bitmap = %llu",
block_group, bitmap_blk);
(int)block_group, (unsigned long long)bitmap_blk);
return NULL;
}
if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) {
put_bh(bh);
return NULL;
}
return bh;
}
/*
......@@ -320,7 +383,7 @@ static void __rsv_window_dump(struct rb_root *root, int verbose,
*/
static int
goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
unsigned int group, struct super_block * sb)
ext4_group_t group, struct super_block *sb)
{
ext4_fsblk_t group_first_block, group_last_block;
......@@ -463,7 +526,7 @@ static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
* when setting the reservation window size through ioctl before the file
* is open for write (needs block allocation).
*
* Needs truncate_mutex protection prior to call this function.
* Needs down_write(i_data_sem) protection prior to call this function.
*/
void ext4_init_block_alloc_info(struct inode *inode)
{
......@@ -514,6 +577,8 @@ void ext4_discard_reservation(struct inode *inode)
struct ext4_reserve_window_node *rsv;
spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
ext4_mb_discard_inode_preallocations(inode);
if (!block_i)
return;
......@@ -540,7 +605,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
{
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gd_bh;
unsigned long block_group;
ext4_group_t block_group;
ext4_grpblk_t bit;
unsigned long i;
unsigned long overflow;
......@@ -587,11 +652,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
in_range(ext4_inode_bitmap(sb, desc), block, count) ||
in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
in_range(block + count - 1, ext4_inode_table(sb, desc),
sbi->s_itb_per_group))
sbi->s_itb_per_group)) {
ext4_error (sb, "ext4_free_blocks",
"Freeing blocks in system zones - "
"Block = %llu, count = %lu",
block, count);
goto error_return;
}
/*
* We are about to start releasing blocks in the bitmap,
......@@ -720,19 +787,29 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
* @inode: inode
* @block: start physical block to free
* @count: number of blocks to count
* @metadata: Are these metadata blocks
*/
void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count)
ext4_fsblk_t block, unsigned long count,
int metadata)
{
struct super_block * sb;
unsigned long dquot_freed_blocks;
/* this isn't the right place to decide whether block is metadata
* inode.c/extents.c knows better, but for safety ... */
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
ext4_should_journal_data(inode))
metadata = 1;
sb = inode->i_sb;
if (!sb) {
printk ("ext4_free_blocks: nonexistent device");
return;
}
ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
ext4_free_blocks_sb(handle, sb, block, count,
&dquot_freed_blocks);
else
ext4_mb_free_blocks(handle, inode, block, count,
metadata, &dquot_freed_blocks);
if (dquot_freed_blocks)
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
return;
......@@ -920,9 +997,10 @@ claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
* ext4_journal_release_buffer(), else we'll run out of credits.
*/
static ext4_grpblk_t
ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal,
unsigned long *count, struct ext4_reserve_window *my_rsv)
ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
ext4_group_t group, struct buffer_head *bitmap_bh,
ext4_grpblk_t grp_goal, unsigned long *count,
struct ext4_reserve_window *my_rsv)
{
ext4_fsblk_t group_first_block;
ext4_grpblk_t start, end;
......@@ -1156,7 +1234,7 @@ static int find_next_reservable_window(
*/
static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
ext4_grpblk_t grp_goal, struct super_block *sb,
unsigned int group, struct buffer_head *bitmap_bh)
ext4_group_t group, struct buffer_head *bitmap_bh)
{
struct ext4_reserve_window_node *search_head;
ext4_fsblk_t group_first_block, group_end_block, start_block;
......@@ -1354,7 +1432,7 @@ static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
*/
static ext4_grpblk_t
ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
unsigned int group, struct buffer_head *bitmap_bh,
ext4_group_t group, struct buffer_head *bitmap_bh,
ext4_grpblk_t grp_goal,
struct ext4_reserve_window_node * my_rsv,
unsigned long *count, int *errp)
......@@ -1510,7 +1588,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
}
/**
* ext4_new_blocks() -- core block(s) allocation function
* ext4_new_blocks_old() -- core block(s) allocation function
* @handle: handle to this transaction
* @inode: file inode
* @goal: given target block(filesystem wide)
......@@ -1523,17 +1601,17 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
* any specific goal block.
*
*/
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gdp_bh;
unsigned long group_no;
int goal_group;
ext4_group_t group_no;
ext4_group_t goal_group;
ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */
ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */
int bgi; /* blockgroup iteration index */
ext4_group_t bgi; /* blockgroup iteration index */
int fatal = 0, err;
int performed_allocation = 0;
ext4_grpblk_t free_blocks; /* number of free blocks in a group */
......@@ -1544,10 +1622,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
struct ext4_reserve_window_node *my_rsv = NULL;
struct ext4_block_alloc_info *block_i;
unsigned short windowsz = 0;
#ifdef EXT4FS_DEBUG
static int goal_hits, goal_attempts;
#endif
unsigned long ngroups;
ext4_group_t ngroups;
unsigned long num = *count;
*errp = -ENOSPC;
......@@ -1567,7 +1642,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es;
ext4_debug("goal=%lu.\n", goal);
ext4_debug("goal=%llu.\n", goal);
/*
* Allocate a block from reservation only when
* filesystem is mounted with reservation(default,-o reservation), and
......@@ -1677,7 +1752,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
allocated:
ext4_debug("using block group %d(%d)\n",
ext4_debug("using block group %lu(%d)\n",
group_no, gdp->bg_free_blocks_count);
BUFFER_TRACE(gdp_bh, "get_write_access");
......@@ -1692,11 +1767,13 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
in_range(ret_block, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group) ||
in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group))
EXT4_SB(sb)->s_itb_per_group)) {
ext4_error(sb, "ext4_new_block",
"Allocating block in system zone - "
"blocks from %llu, length %lu",
ret_block, num);
goto out;
}
performed_allocation = 1;
......@@ -1743,9 +1820,6 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
* list of some description. We don't know in advance whether
* the caller wants to use it as metadata or data.
*/
ext4_debug("allocating block %lu. Goal hits %d of %d.\n",
ret_block, goal_hits, goal_attempts);
spin_lock(sb_bgl_lock(sbi, group_no));
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
......@@ -1787,13 +1861,46 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
}
ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, int *errp)
ext4_fsblk_t goal, int *errp)
{
unsigned long count = 1;
struct ext4_allocation_request ar;
ext4_fsblk_t ret;
return ext4_new_blocks(handle, inode, goal, &count, errp);
if (!test_opt(inode->i_sb, MBALLOC)) {
unsigned long count = 1;
ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
return ret;
}
memset(&ar, 0, sizeof(ar));
ar.inode = inode;
ar.goal = goal;
ar.len = 1;
ret = ext4_mb_new_blocks(handle, &ar, errp);
return ret;
}
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp)
{
struct ext4_allocation_request ar;
ext4_fsblk_t ret;
if (!test_opt(inode->i_sb, MBALLOC)) {
ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
return ret;
}
memset(&ar, 0, sizeof(ar));
ar.inode = inode;
ar.goal = goal;
ar.len = *count;
ret = ext4_mb_new_blocks(handle, &ar, errp);
*count = ar.len;
return ret;
}
/**
* ext4_count_free_blocks() -- count filesystem free blocks
* @sb: superblock
......@@ -1804,8 +1911,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
{
ext4_fsblk_t desc_count;
struct ext4_group_desc *gdp;
int i;
unsigned long ngroups = EXT4_SB(sb)->s_groups_count;
ext4_group_t i;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
#ifdef EXT4FS_DEBUG
struct ext4_super_block *es;
ext4_fsblk_t bitmap_count;
......@@ -1829,14 +1936,14 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
continue;
x = ext4_count_free(bitmap_bh, sb->s_blocksize);
printk("group %d: stored = %d, counted = %lu\n",
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
i, le16_to_cpu(gdp->bg_free_blocks_count), x);
bitmap_count += x;
}
brelse(bitmap_bh);
printk("ext4_count_free_blocks: stored = %llu"
", computed = %llu, %llu\n",
EXT4_FREE_BLOCKS_COUNT(es),
ext4_free_blocks_count(es),
desc_count, bitmap_count);
return bitmap_count;
#else
......@@ -1853,7 +1960,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
#endif
}
static inline int test_root(int a, int b)
static inline int test_root(ext4_group_t a, int b)
{
int num = b;
......@@ -1862,7 +1969,7 @@ static inline int test_root(int a, int b)
return num == a;
}
static int ext4_group_sparse(int group)
static int ext4_group_sparse(ext4_group_t group)
{
if (group <= 1)
return 1;
......@@ -1880,7 +1987,7 @@ static int ext4_group_sparse(int group)
* Return the number of blocks used by the superblock (primary or backup)
* in this group. Currently this will be only 0 or 1.
*/
int ext4_bg_has_super(struct super_block *sb, int group)
int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
{
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
......@@ -1889,18 +1996,20 @@ int ext4_bg_has_super(struct super_block *sb, int group)
return 1;
}
static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group)
static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
ext4_group_t group)
{
unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb);
unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
if (group == first || group == first + 1 || group == last)
return 1;
return 0;
}
static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group)
static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
ext4_group_t group)
{
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
......@@ -1918,7 +2027,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group)
* (primary or backup) in this group. In the future there may be a
* different number of descriptor blocks in each group.
*/
unsigned long ext4_bg_num_gdb(struct super_block *sb, int group)
unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
{
unsigned long first_meta_bg =
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
......
......@@ -67,7 +67,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
unsigned long offset)
{
const char * error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len);
const int rlen = ext4_rec_len_from_disk(de->rec_len);
if (rlen < EXT4_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
......@@ -124,7 +124,7 @@ static int ext4_readdir(struct file * filp,
offset = filp->f_pos & (sb->s_blocksize - 1);
while (!error && !stored && filp->f_pos < inode->i_size) {
unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
struct buffer_head map_bh;
struct buffer_head *bh = NULL;
......@@ -172,10 +172,10 @@ static int ext4_readdir(struct file * filp,
* least that it is non-zero. A
* failure will be detected in the
* dirent test below. */
if (le16_to_cpu(de->rec_len) <
EXT4_DIR_REC_LEN(1))
if (ext4_rec_len_from_disk(de->rec_len)
< EXT4_DIR_REC_LEN(1))
break;
i += le16_to_cpu(de->rec_len);
i += ext4_rec_len_from_disk(de->rec_len);
}
offset = i;
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
......@@ -197,7 +197,7 @@ static int ext4_readdir(struct file * filp,
ret = stored;
goto out;
}
offset += le16_to_cpu(de->rec_len);
offset += ext4_rec_len_from_disk(de->rec_len);
if (le32_to_cpu(de->inode)) {
/* We might block in the next section
* if the data destination is
......@@ -219,7 +219,7 @@ static int ext4_readdir(struct file * filp,
goto revalidate;
stored ++;
}
filp->f_pos += le16_to_cpu(de->rec_len);
filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
}
offset = 0;
brelse (bh);
......
此差异已折叠。
......@@ -37,9 +37,9 @@ static int ext4_release_file (struct inode * inode, struct file * filp)
if ((filp->f_mode & FMODE_WRITE) &&
(atomic_read(&inode->i_writecount) == 1))
{
mutex_lock(&EXT4_I(inode)->truncate_mutex);
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_reservation(inode);
mutex_unlock(&EXT4_I(inode)->truncate_mutex);
up_write(&EXT4_I(inode)->i_data_sem);
}
if (is_dx(inode) && filp->private_data)
ext4_htree_free_dir_info(filp->private_data);
......@@ -56,8 +56,25 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
ssize_t ret;
int err;
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
/*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
*/
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
size_t length = iov_length(iov, nr_segs);
if (pos > sbi->s_bitmap_maxbytes)
return -EFBIG;
if (pos + length > sbi->s_bitmap_maxbytes) {
nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
sbi->s_bitmap_maxbytes - pos);
}
}
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
/*
* Skip flushing if there was an error, or if nothing was written.
*/
......
......@@ -14,14 +14,16 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
struct ext4_group_desc *gdp);
struct buffer_head *read_block_bitmap(struct super_block *sb,
unsigned int block_group);
ext4_group_t block_group);
extern unsigned ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh, int group,
struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc);
#define ext4_free_blocks_after_init(sb, group, desc) \
ext4_init_block_bitmap(sb, NULL, group, desc)
extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct buffer_head *bh, int group,
struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc);
extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
#endif /* _LINUX_EXT4_GROUP_H */
......@@ -64,8 +64,8 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
}
/* Initializes an uninitialized inode bitmap */
unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct buffer_head *bh, int block_group,
unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
......@@ -75,7 +75,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb,
/* If checksum is bad mark all blocks and inodes use to prevent
* allocation, essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n",
block_group);
gdp->bg_free_blocks_count = 0;
gdp->bg_free_inodes_count = 0;
......@@ -98,7 +98,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb,
* Return buffer_head of bitmap on success or NULL.
*/
static struct buffer_head *
read_inode_bitmap(struct super_block * sb, unsigned long block_group)
read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc *desc;
struct buffer_head *bh = NULL;
......@@ -152,7 +152,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
unsigned long ino;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
unsigned long block_group;
ext4_group_t block_group;
unsigned long bit;
struct ext4_group_desc * gdp;
struct ext4_super_block * es;
......@@ -260,12 +260,14 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
* For other inodes, search forward from the parent directory\'s block
* group to find a free inode.
*/
static int find_group_dir(struct super_block *sb, struct inode *parent)
static int find_group_dir(struct super_block *sb, struct inode *parent,
ext4_group_t *best_group)
{
int ngroups = EXT4_SB(sb)->s_groups_count;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
unsigned int freei, avefreei;
struct ext4_group_desc *desc, *best_desc = NULL;
int group, best_group = -1;
ext4_group_t group;
int ret = -1;
freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
avefreei = freei / ngroups;
......@@ -279,11 +281,12 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
if (!best_desc ||
(le16_to_cpu(desc->bg_free_blocks_count) >
le16_to_cpu(best_desc->bg_free_blocks_count))) {
best_group = group;
*best_group = group;
best_desc = desc;
ret = 0;
}
}
return best_group;
return ret;
}
/*
......@@ -314,12 +317,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
#define INODE_COST 64
#define BLOCK_COST 256
static int find_group_orlov(struct super_block *sb, struct inode *parent)
static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t *group)
{
int parent_group = EXT4_I(parent)->i_block_group;
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
int ngroups = sbi->s_groups_count;
ext4_group_t ngroups = sbi->s_groups_count;
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei;
ext4_fsblk_t freeb, avefreeb;
......@@ -327,7 +331,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
unsigned int ndirs;
int max_debt, max_dirs, min_inodes;
ext4_grpblk_t min_blocks;
int group = -1, i;
ext4_group_t i;
struct ext4_group_desc *desc;
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
......@@ -340,13 +344,14 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
if ((parent == sb->s_root->d_inode) ||
(EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
int best_ndir = inodes_per_group;
int best_group = -1;
ext4_group_t grp;
int ret = -1;
get_random_bytes(&group, sizeof(group));
parent_group = (unsigned)group % ngroups;
get_random_bytes(&grp, sizeof(grp));
parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, NULL);
grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, grp, NULL);
if (!desc || !desc->bg_free_inodes_count)
continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
......@@ -355,11 +360,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
continue;
if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
continue;
best_group = group;
*group = grp;
ret = 0;
best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
}
if (best_group >= 0)
return best_group;
if (ret == 0)
return ret;
goto fallback;
}
......@@ -380,8 +386,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
max_debt = 1;
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, NULL);
*group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
if (!desc || !desc->bg_free_inodes_count)
continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
......@@ -390,17 +396,16 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
continue;
if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
continue;
return group;
return 0;
}
fallback:
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, NULL);
if (!desc || !desc->bg_free_inodes_count)
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
return group;
*group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && desc->bg_free_inodes_count &&
le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
return 0;
}
if (avefreei) {
......@@ -415,21 +420,22 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
return -1;
}
static int find_group_other(struct super_block *sb, struct inode *parent)
static int find_group_other(struct super_block *sb, struct inode *parent,
ext4_group_t *group)
{
int parent_group = EXT4_I(parent)->i_block_group;
int ngroups = EXT4_SB(sb)->s_groups_count;
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
struct ext4_group_desc *desc;
int group, i;
ext4_group_t i;
/*
* Try to place the inode in its parent directory
*/
group = parent_group;
desc = ext4_get_group_desc (sb, group, NULL);
*group = parent_group;
desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
le16_to_cpu(desc->bg_free_blocks_count))
return group;
return 0;
/*
* We're going to place this inode in a different blockgroup from its
......@@ -440,33 +446,33 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
*
* So add our directory's i_ino into the starting point for the hash.
*/
group = (group + parent->i_ino) % ngroups;
*group = (*group + parent->i_ino) % ngroups;
/*
* Use a quadratic hash to find a group with a free inode and some free
* blocks.
*/
for (i = 1; i < ngroups; i <<= 1) {
group += i;
if (group >= ngroups)
group -= ngroups;
desc = ext4_get_group_desc (sb, group, NULL);
*group += i;
if (*group >= ngroups)
*group -= ngroups;
desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
le16_to_cpu(desc->bg_free_blocks_count))
return group;
return 0;
}
/*
* That failed: try linear search for a free inode, even if that group
* has no free blocks.
*/
group = parent_group;
*group = parent_group;
for (i = 0; i < ngroups; i++) {
if (++group >= ngroups)
group = 0;
desc = ext4_get_group_desc (sb, group, NULL);
if (++*group >= ngroups)
*group = 0;
desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && le16_to_cpu(desc->bg_free_inodes_count))
return group;
return 0;
}
return -1;
......@@ -487,16 +493,17 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
int group;
ext4_group_t group = 0;
unsigned long ino = 0;
struct inode * inode;
struct ext4_group_desc * gdp = NULL;
struct ext4_super_block * es;
struct ext4_inode_info *ei;
struct ext4_sb_info *sbi;
int err = 0;
int ret2, err = 0;
struct inode *ret;
int i, free = 0;
ext4_group_t i;
int free = 0;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
......@@ -512,14 +519,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
es = sbi->s_es;
if (S_ISDIR(mode)) {
if (test_opt (sb, OLDALLOC))
group = find_group_dir(sb, dir);
ret2 = find_group_dir(sb, dir, &group);
else
group = find_group_orlov(sb, dir);
ret2 = find_group_orlov(sb, dir, &group);
} else
group = find_group_other(sb, dir);
ret2 = find_group_other(sb, dir, &group);
err = -ENOSPC;
if (group == -1)
if (ret2 == -1)
goto out;
for (i = 0; i < sbi->s_groups_count; i++) {
......@@ -583,7 +590,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
ino > EXT4_INODES_PER_GROUP(sb)) {
ext4_error(sb, __FUNCTION__,
"reserved inode or inode > inodes count - "
"block_group = %d, inode=%lu", group,
"block_group = %lu, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb));
err = -EIO;
goto fail;
......@@ -702,7 +709,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
if (!S_ISDIR(mode))
ei->i_flags &= ~EXT4_DIRSYNC_FL;
ei->i_file_acl = 0;
ei->i_dir_acl = 0;
ei->i_dtime = 0;
ei->i_block_alloc_info = NULL;
ei->i_block_group = group;
......@@ -741,13 +747,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
if (test_opt(sb, EXTENTS)) {
EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
ext4_ext_tree_init(handle, inode);
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
if (err) goto fail;
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS);
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
}
err = ext4_update_incompat_feature(handle, sb,
EXT4_FEATURE_INCOMPAT_EXTENTS);
if (err)
goto fail;
}
ext4_debug("allocating inode %lu\n", inode->i_ino);
......@@ -777,7 +780,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
{
unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
unsigned long block_group;
ext4_group_t block_group;
int bit;
struct buffer_head *bitmap_bh = NULL;
struct inode *inode = NULL;
......@@ -833,7 +836,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
{
unsigned long desc_count;
struct ext4_group_desc *gdp;
int i;
ext4_group_t i;
#ifdef EXT4FS_DEBUG
struct ext4_super_block *es;
unsigned long bitmap_count, x;
......@@ -854,7 +857,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
continue;
x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
printk("group %d: stored = %d, counted = %lu\n",
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
i, le16_to_cpu(gdp->bg_free_inodes_count), x);
bitmap_count += x;
}
......@@ -879,7 +882,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
unsigned long ext4_count_dirs (struct super_block * sb)
{
unsigned long count = 0;
int i;
ext4_group_t i;
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);
......
此差异已折叠。
......@@ -199,7 +199,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
* need to allocate reservation structure for this inode
* before set the window size
*/
mutex_lock(&ei->truncate_mutex);
down_write(&ei->i_data_sem);
if (!ei->i_block_alloc_info)
ext4_init_block_alloc_info(inode);
......@@ -207,7 +207,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
rsv->rsv_goal_size = rsv_window_size;
}
mutex_unlock(&ei->truncate_mutex);
up_write(&ei->i_data_sem);
return 0;
}
case EXT4_IOC_GROUP_EXTEND: {
......@@ -254,6 +254,9 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
return err;
}
case EXT4_IOC_MIGRATE:
return ext4_ext_migrate(inode, filp, cmd, arg);
default:
return -ENOTTY;
}
......
此差异已折叠。
/*
* Copyright IBM Corporation, 2007
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
#include <linux/module.h>
#include <linux/ext4_jbd2.h>
#include <linux/ext4_fs_extents.h>
/*
* The contiguous blocks details which can be
* represented by a single extent
*/
struct list_blocks_struct {
ext4_lblk_t first_block, last_block;
ext4_fsblk_t first_pblock, last_pblock;
};
static int finish_range(handle_t *handle, struct inode *inode,
struct list_blocks_struct *lb)
{
int retval = 0, needed;
struct ext4_extent newext;
struct ext4_ext_path *path;
if (lb->first_pblock == 0)
return 0;
/* Add the extent to temp inode*/
newext.ee_block = cpu_to_le32(lb->first_block);
newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
ext4_ext_store_pblock(&newext, lb->first_pblock);
path = ext4_ext_find_extent(inode, lb->first_block, NULL);
if (IS_ERR(path)) {
retval = PTR_ERR(path);
goto err_out;
}
/*
* Calculate the credit needed to inserting this extent
* Since we are doing this in loop we may accumalate extra
* credit. But below we try to not accumalate too much
* of them by restarting the journal.
*/
needed = ext4_ext_calc_credits_for_insert(inode, path);
/*
* Make sure the credit we accumalated is not really high
*/
if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
}
if (needed) {
retval = ext4_journal_extend(handle, needed);
if (retval != 0) {
/*
* IF not able to extend the journal restart the journal
*/
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
}
}
retval = ext4_ext_insert_extent(handle, inode, path, &newext);
err_out:
lb->first_pblock = 0;
return retval;
}
static int update_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t blk_num,
struct list_blocks_struct *lb)
{
int retval;
/*
* See if we can add on to the existing range (if it exists)
*/
if (lb->first_pblock &&
(lb->last_pblock+1 == pblock) &&
(lb->last_block+1 == blk_num)) {
lb->last_pblock = pblock;
lb->last_block = blk_num;
return 0;
}
/*
* Start a new range.
*/
retval = finish_range(handle, inode, lb);
lb->first_pblock = lb->last_pblock = pblock;
lb->first_block = lb->last_block = blk_num;
return retval;
}
static int update_ind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++, blk_count++) {
if (i_data[i]) {
retval = update_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
blk_count, lb);
if (retval)
break;
}
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
}
static int update_dind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries * max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (i_data[i]) {
retval = update_ind_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
&blk_count, lb);
if (retval)
break;
} else {
/* Only update the file block number */
blk_count += max_entries;
}
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
}
static int update_tind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries * max_entries * max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (i_data[i]) {
retval = update_dind_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
&blk_count, lb);
if (retval)
break;
} else
/* Only update the file block number */
blk_count += max_entries * max_entries;
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
}
static int free_dind_blocks(handle_t *handle,
struct inode *inode, __le32 i_data)
{
int i;
__le32 *tmp_idata;
struct buffer_head *bh;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
if (!bh)
return -EIO;
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i])
ext4_free_blocks(handle, inode,
le32_to_cpu(tmp_idata[i]), 1, 1);
}
put_bh(bh);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
return 0;
}
static int free_tind_blocks(handle_t *handle,
struct inode *inode, __le32 i_data)
{
int i, retval = 0;
__le32 *tmp_idata;
struct buffer_head *bh;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
if (!bh)
return -EIO;
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i]) {
retval = free_dind_blocks(handle,
inode, tmp_idata[i]);
if (retval) {
put_bh(bh);
return retval;
}
}
}
put_bh(bh);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
return 0;
}
static int free_ind_block(handle_t *handle, struct inode *inode)
{
int retval;
struct ext4_inode_info *ei = EXT4_I(inode);
if (ei->i_data[EXT4_IND_BLOCK])
ext4_free_blocks(handle, inode,
le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1);
if (ei->i_data[EXT4_DIND_BLOCK]) {
retval = free_dind_blocks(handle, inode,
ei->i_data[EXT4_DIND_BLOCK]);
if (retval)
return retval;
}
if (ei->i_data[EXT4_TIND_BLOCK]) {
retval = free_tind_blocks(handle, inode,
ei->i_data[EXT4_TIND_BLOCK]);
if (retval)
return retval;
}
return 0;
}
static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
struct inode *tmp_inode, int retval)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
retval = free_ind_block(handle, inode);
if (retval)
goto err_out;
/*
* One credit accounted for writing the
* i_data field of the original inode
*/
retval = ext4_journal_extend(handle, 1);
if (retval != 0) {
retval = ext4_journal_restart(handle, 1);
if (retval)
goto err_out;
}
/*
* We have the extent map build with the tmp inode.
* Now copy the i_data across
*/
ei->i_flags |= EXT4_EXTENTS_FL;
memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
/*
* Update i_blocks with the new blocks that got
* allocated while adding extents for extent index
* blocks.
*
* While converting to extents we need not
* update the orignal inode i_blocks for extent blocks
* via quota APIs. The quota update happened via tmp_inode already.
*/
spin_lock(&inode->i_lock);
inode->i_blocks += tmp_inode->i_blocks;
spin_unlock(&inode->i_lock);
ext4_mark_inode_dirty(handle, inode);
err_out:
return retval;
}
static int free_ext_idx(handle_t *handle, struct inode *inode,
struct ext4_extent_idx *ix)
{
int i, retval = 0;
ext4_fsblk_t block;
struct buffer_head *bh;
struct ext4_extent_header *eh;
block = idx_pblock(ix);
bh = sb_bread(inode->i_sb, block);
if (!bh)
return -EIO;
eh = (struct ext4_extent_header *)bh->b_data;
if (eh->eh_depth != 0) {
ix = EXT_FIRST_INDEX(eh);
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
retval = free_ext_idx(handle, inode, ix);
if (retval)
break;
}
}
put_bh(bh);
ext4_free_blocks(handle, inode, block, 1, 1);
return retval;
}
/*
* Free the extent meta data blocks only
*/
static int free_ext_block(handle_t *handle, struct inode *inode)
{
int i, retval = 0;
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
struct ext4_extent_idx *ix;
if (eh->eh_depth == 0)
/*
* No extra blocks allocated for extent meta data
*/
return 0;
ix = EXT_FIRST_INDEX(eh);
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
retval = free_ext_idx(handle, inode, ix);
if (retval)
return retval;
}
return retval;
}
int ext4_ext_migrate(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
handle_t *handle;
int retval = 0, i;
__le32 *i_data;
ext4_lblk_t blk_count = 0;
struct ext4_inode_info *ei;
struct inode *tmp_inode = NULL;
struct list_blocks_struct lb;
unsigned long max_entries;
if (!test_opt(inode->i_sb, EXTENTS))
/*
* if mounted with noextents we don't allow the migrate
*/
return -EINVAL;
if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
return -EINVAL;
down_write(&EXT4_I(inode)->i_data_sem);
handle = ext4_journal_start(inode,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
+ 1);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
goto err_out;
}
tmp_inode = ext4_new_inode(handle,
inode->i_sb->s_root->d_inode,
S_IFREG);
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
ext4_journal_stop(handle);
tmp_inode = NULL;
goto err_out;
}
i_size_write(tmp_inode, i_size_read(inode));
/*
* We don't want the inode to be reclaimed
* if we got interrupted in between. We have
* this tmp inode carrying reference to the
* data blocks of the original file. We set
* the i_nlink to zero at the last stage after
* switching the original file to extent format
*/
tmp_inode->i_nlink = 1;
ext4_ext_tree_init(handle, tmp_inode);
ext4_orphan_add(handle, tmp_inode);
ext4_journal_stop(handle);
ei = EXT4_I(inode);
i_data = ei->i_data;
memset(&lb, 0, sizeof(lb));
/* 32 bit block address 4 bytes */
max_entries = inode->i_sb->s_blocksize >> 2;
/*
* start with one credit accounted for
* superblock modification.
*
* For the tmp_inode we already have commited the
* trascation that created the inode. Later as and
* when we add extents we extent the journal
*/
handle = ext4_journal_start(inode, 1);
for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
if (i_data[i]) {
retval = update_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[i]),
blk_count, &lb);
if (retval)
goto err_out;
}
}
if (i_data[EXT4_IND_BLOCK]) {
retval = update_ind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_IND_BLOCK]),
&blk_count, &lb);
if (retval)
goto err_out;
} else
blk_count += max_entries;
if (i_data[EXT4_DIND_BLOCK]) {
retval = update_dind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
&blk_count, &lb);
if (retval)
goto err_out;
} else
blk_count += max_entries * max_entries;
if (i_data[EXT4_TIND_BLOCK]) {
retval = update_tind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
&blk_count, &lb);
if (retval)
goto err_out;
}
/*
* Build the last extent
*/
retval = finish_range(handle, tmp_inode, &lb);
err_out:
/*
* We are either freeing extent information or indirect
* blocks. During this we touch superblock, group descriptor
* and block bitmap. Later we mark the tmp_inode dirty
* via ext4_ext_tree_init. So allocate a credit of 4
* We may update quota (user and group).
*
* FIXME!! we may be touching bitmaps in different block groups.
*/
if (ext4_journal_extend(handle,
4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0)
ext4_journal_restart(handle,
4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
if (retval)
/*
* Failure case delete the extent information with the
* tmp_inode
*/
free_ext_block(handle, tmp_inode);
else
retval = ext4_ext_swap_inode_data(handle, inode,
tmp_inode, retval);
/*
* Mark the tmp_inode as of size zero
*/
i_size_write(tmp_inode, 0);
/*
* set the i_blocks count to zero
* so that the ext4_delete_inode does the
* right job
*
* We don't need to take the i_lock because
* the inode is not visible to user space.
*/
tmp_inode->i_blocks = 0;
/* Reset the extent details */
ext4_ext_tree_init(handle, tmp_inode);
/*
* Set the i_nlink to zero so that
* generic_drop_inode really deletes the
* inode
*/
tmp_inode->i_nlink = 0;
ext4_journal_stop(handle);
up_write(&EXT4_I(inode)->i_data_sem);
if (tmp_inode)
iput(tmp_inode);
return retval;
}
......@@ -51,7 +51,7 @@
static struct buffer_head *ext4_append(handle_t *handle,
struct inode *inode,
u32 *block, int *err)
ext4_lblk_t *block, int *err)
{
struct buffer_head *bh;
......@@ -144,8 +144,8 @@ struct dx_map_entry
u16 size;
};
static inline unsigned dx_get_block (struct dx_entry *entry);
static void dx_set_block (struct dx_entry *entry, unsigned value);
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
static inline unsigned dx_get_hash (struct dx_entry *entry);
static void dx_set_hash (struct dx_entry *entry, unsigned value);
static unsigned dx_get_count (struct dx_entry *entries);
......@@ -166,7 +166,8 @@ static void dx_sort_map(struct dx_map_entry *map, unsigned count);
static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to,
struct dx_map_entry *offsets, int count);
static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size);
static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
static void dx_insert_block(struct dx_frame *frame,
u32 hash, ext4_lblk_t block);
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame,
struct dx_frame *frames,
......@@ -181,12 +182,12 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
* Mask them off for now.
*/
static inline unsigned dx_get_block (struct dx_entry *entry)
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
{
return le32_to_cpu(entry->block) & 0x00ffffff;
}
static inline void dx_set_block (struct dx_entry *entry, unsigned value)
static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
{
entry->block = cpu_to_le32(value);
}
......@@ -243,8 +244,8 @@ static void dx_show_index (char * label, struct dx_entry *entries)
int i, n = dx_get_count (entries);
printk("%s index ", label);
for (i = 0; i < n; i++) {
printk("%x->%u ", i? dx_get_hash(entries + i) :
0, dx_get_block(entries + i));
printk("%x->%lu ", i? dx_get_hash(entries + i) :
0, (unsigned long)dx_get_block(entries + i));
}
printk("\n");
}
......@@ -280,7 +281,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
space += EXT4_DIR_REC_LEN(de->name_len);
names++;
}
de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
de = ext4_next_entry(de);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
......@@ -297,7 +298,8 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
printk("%i indexed blocks...\n", count);
for (i = 0; i < count; i++, entries++)
{
u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
ext4_lblk_t block = dx_get_block(entries);
ext4_lblk_t hash = i ? dx_get_hash(entries): 0;
u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
struct stats stats;
printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
......@@ -551,7 +553,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
*/
static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
{
return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
return (struct ext4_dir_entry_2 *)((char *)p +
ext4_rec_len_from_disk(p->rec_len));
}
/*
......@@ -560,7 +563,7 @@ static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *
* into the tree. If there is an error it is returned in err.
*/
static int htree_dirblock_to_tree(struct file *dir_file,
struct inode *dir, int block,
struct inode *dir, ext4_lblk_t block,
struct dx_hash_info *hinfo,
__u32 start_hash, __u32 start_minor_hash)
{
......@@ -568,7 +571,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
struct ext4_dir_entry_2 *de, *top;
int err, count = 0;
dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block));
if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
return err;
......@@ -620,9 +624,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
struct ext4_dir_entry_2 *de;
struct dx_frame frames[2], *frame;
struct inode *dir;
int block, err;
ext4_lblk_t block;
int count = 0;
int ret;
int ret, err;
__u32 hashval;
dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
......@@ -720,7 +724,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
cond_resched();
}
/* XXX: do we need to check rec_len == 0 case? -Chris */
de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
de = ext4_next_entry(de);
}
return count;
}
......@@ -752,7 +756,7 @@ static void dx_sort_map (struct dx_map_entry *map, unsigned count)
} while(more);
}
static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
{
struct dx_entry *entries = frame->entries;
struct dx_entry *old = frame->at, *new = old + 1;
......@@ -820,7 +824,7 @@ static inline int search_dirblock(struct buffer_head * bh,
return 1;
}
/* prevent looping on a bad block */
de_len = le16_to_cpu(de->rec_len);
de_len = ext4_rec_len_from_disk(de->rec_len);
if (de_len <= 0)
return -1;
offset += de_len;
......@@ -847,23 +851,20 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
struct super_block * sb;
struct buffer_head * bh_use[NAMEI_RA_SIZE];
struct buffer_head * bh, *ret = NULL;
unsigned long start, block, b;
ext4_lblk_t start, block, b;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
buffer */
int num = 0;
int nblocks, i, err;
ext4_lblk_t nblocks;
int i, err;
struct inode *dir = dentry->d_parent->d_inode;
int namelen;
const u8 *name;
unsigned blocksize;
*res_dir = NULL;
sb = dir->i_sb;
blocksize = sb->s_blocksize;
namelen = dentry->d_name.len;
name = dentry->d_name.name;
if (namelen > EXT4_NAME_LEN)
return NULL;
if (is_dx(dir)) {
......@@ -914,7 +915,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */
ext4_error(sb, __FUNCTION__, "reading directory #%lu "
"offset %lu", dir->i_ino, block);
"offset %lu", dir->i_ino,
(unsigned long)block);
brelse(bh);
goto next;
}
......@@ -961,7 +963,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
struct dx_frame frames[2], *frame;
struct ext4_dir_entry_2 *de, *top;
struct buffer_head *bh;
unsigned long block;
ext4_lblk_t block;
int retval;
int namelen = dentry->d_name.len;
const u8 *name = dentry->d_name.name;
......@@ -1128,7 +1130,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
rec_len = EXT4_DIR_REC_LEN(de->name_len);
memcpy (to, de, rec_len);
((struct ext4_dir_entry_2 *) to)->rec_len =
cpu_to_le16(rec_len);
ext4_rec_len_to_disk(rec_len);
de->inode = 0;
map++;
to += rec_len;
......@@ -1147,13 +1149,12 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
prev = to = de;
while ((char*)de < base + size) {
next = (struct ext4_dir_entry_2 *) ((char *) de +
le16_to_cpu(de->rec_len));
next = ext4_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT4_DIR_REC_LEN(de->name_len);
if (de > to)
memmove(to, de, rec_len);
to->rec_len = cpu_to_le16(rec_len);
to->rec_len = ext4_rec_len_to_disk(rec_len);
prev = to;
to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
}
......@@ -1174,7 +1175,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
unsigned blocksize = dir->i_sb->s_blocksize;
unsigned count, continued;
struct buffer_head *bh2;
u32 newblock;
ext4_lblk_t newblock;
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
......@@ -1221,14 +1222,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
dx_get_block(frame->at), hash2, split, count-split));
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
(unsigned long)dx_get_block(frame->at),
hash2, split, count-split));
/* Fancy dance to stay within two buffers */
de2 = dx_move_dirents(data1, data2, map + split, count - split);
de = dx_pack_dirents(data1,blocksize);
de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
......@@ -1297,7 +1299,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
return -EEXIST;
}
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = le16_to_cpu(de->rec_len);
rlen = ext4_rec_len_from_disk(de->rec_len);
if ((de->inode? rlen - nlen: rlen) >= reclen)
break;
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
......@@ -1316,11 +1318,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
/* By now the buffer is marked for journaling */
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = le16_to_cpu(de->rec_len);
rlen = ext4_rec_len_from_disk(de->rec_len);
if (de->inode) {
struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
de1->rec_len = cpu_to_le16(rlen - nlen);
de->rec_len = cpu_to_le16(nlen);
de1->rec_len = ext4_rec_len_to_disk(rlen - nlen);
de->rec_len = ext4_rec_len_to_disk(nlen);
de = de1;
}
de->file_type = EXT4_FT_UNKNOWN;
......@@ -1374,7 +1376,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
int retval;
unsigned blocksize;
struct dx_hash_info hinfo;
u32 block;
ext4_lblk_t block;
struct fake_dirent *fde;
blocksize = dir->i_sb->s_blocksize;
......@@ -1397,17 +1399,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
/* The 0th block becomes the root, move the dirents out */
fde = &root->dotdot;
de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
de = (struct ext4_dir_entry_2 *)((char *)fde +
ext4_rec_len_from_disk(fde->rec_len));
len = ((char *) root) + blocksize - (char *) de;
memcpy (data1, de, len);
de = (struct ext4_dir_entry_2 *) data1;
top = data1 + len;
while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
while ((char *)(de2 = ext4_next_entry(de)) < top)
de = de2;
de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext4_dir_entry_2 *) (&root->dotdot);
de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2));
de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2));
memset (&root->info, 0, sizeof(root->info));
root->info.info_length = sizeof(root->info);
root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
......@@ -1454,7 +1457,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
int retval;
int dx_fallback=0;
unsigned blocksize;
u32 block, blocks;
ext4_lblk_t block, blocks;
sb = dir->i_sb;
blocksize = sb->s_blocksize;
......@@ -1487,7 +1490,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
return retval;
de = (struct ext4_dir_entry_2 *) bh->b_data;
de->inode = 0;
de->rec_len = cpu_to_le16(blocksize);
de->rec_len = ext4_rec_len_to_disk(blocksize);
return add_dirent_to_buf(handle, dentry, inode, de, bh);
}
......@@ -1531,7 +1534,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
dx_get_count(entries), dx_get_limit(entries)));
/* Need to split index? */
if (dx_get_count(entries) == dx_get_limit(entries)) {
u32 newblock;
ext4_lblk_t newblock;
unsigned icount = dx_get_count(entries);
int levels = frame - frames;
struct dx_entry *entries2;
......@@ -1550,7 +1553,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
goto cleanup;
node2 = (struct dx_node *)(bh2->b_data);
entries2 = node2->entries;
node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize);
node2->fake.inode = 0;
BUFFER_TRACE(frame->bh, "get_write_access");
err = ext4_journal_get_write_access(handle, frame->bh);
......@@ -1648,9 +1651,9 @@ static int ext4_delete_entry (handle_t *handle,
BUFFER_TRACE(bh, "get_write_access");
ext4_journal_get_write_access(handle, bh);
if (pde)
pde->rec_len =
cpu_to_le16(le16_to_cpu(pde->rec_len) +
le16_to_cpu(de->rec_len));
pde->rec_len = ext4_rec_len_to_disk(
ext4_rec_len_from_disk(pde->rec_len) +
ext4_rec_len_from_disk(de->rec_len));
else
de->inode = 0;
dir->i_version++;
......@@ -1658,10 +1661,9 @@ static int ext4_delete_entry (handle_t *handle,
ext4_journal_dirty_metadata(handle, bh);
return 0;
}
i += le16_to_cpu(de->rec_len);
i += ext4_rec_len_from_disk(de->rec_len);
pde = de;
de = (struct ext4_dir_entry_2 *)
((char *) de + le16_to_cpu(de->rec_len));
de = ext4_next_entry(de);
}
return -ENOENT;
}
......@@ -1824,13 +1826,13 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
de = (struct ext4_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len));
de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
de = (struct ext4_dir_entry_2 *)
((char *) de + le16_to_cpu(de->rec_len));
de = ext4_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(1));
de->name_len = 2;
strcpy (de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
......@@ -1882,8 +1884,7 @@ static int empty_dir (struct inode * inode)
return 1;
}
de = (struct ext4_dir_entry_2 *) bh->b_data;
de1 = (struct ext4_dir_entry_2 *)
((char *) de + le16_to_cpu(de->rec_len));
de1 = ext4_next_entry(de);
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
......@@ -1894,9 +1895,9 @@ static int empty_dir (struct inode * inode)
brelse (bh);
return 1;
}
offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
de = (struct ext4_dir_entry_2 *)
((char *) de1 + le16_to_cpu(de1->rec_len));
offset = ext4_rec_len_from_disk(de->rec_len) +
ext4_rec_len_from_disk(de1->rec_len);
de = ext4_next_entry(de1);
while (offset < inode->i_size ) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
......@@ -1925,9 +1926,8 @@ static int empty_dir (struct inode * inode)
brelse (bh);
return 0;
}
offset += le16_to_cpu(de->rec_len);
de = (struct ext4_dir_entry_2 *)
((char *) de + le16_to_cpu(de->rec_len));
offset += ext4_rec_len_from_disk(de->rec_len);
de = ext4_next_entry(de);
}
brelse (bh);
return 1;
......@@ -2282,8 +2282,7 @@ static int ext4_link (struct dentry * old_dentry,
}
#define PARENT_INO(buffer) \
((struct ext4_dir_entry_2 *) ((char *) buffer + \
le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode
(ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode)
/*
* Anybody can rename anything with this: the permission checks are left to the
......
......@@ -28,7 +28,7 @@ static int verify_group_input(struct super_block *sb,
struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t start = ext4_blocks_count(es);
ext4_fsblk_t end = start + input->blocks_count;
unsigned group = input->group;
ext4_group_t group = input->group;
ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
unsigned overhead = ext4_bg_has_super(sb, group) ?
(1 + ext4_bg_num_gdb(sb, group) +
......@@ -206,7 +206,7 @@ static int setup_new_group_blocks(struct super_block *sb,
}
if (ext4_bg_has_super(sb, input->group)) {
ext4_debug("mark backup superblock %#04lx (+0)\n", start);
ext4_debug("mark backup superblock %#04llx (+0)\n", start);
ext4_set_bit(0, bh->b_data);
}
......@@ -215,7 +215,7 @@ static int setup_new_group_blocks(struct super_block *sb,
i < gdblocks; i++, block++, bit++) {
struct buffer_head *gdb;
ext4_debug("update backup group %#04lx (+%d)\n", block, bit);
ext4_debug("update backup group %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh;
......@@ -243,7 +243,7 @@ static int setup_new_group_blocks(struct super_block *sb,
i < reserved_gdb; i++, block++, bit++) {
struct buffer_head *gdb;
ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit);
ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh;
......@@ -256,10 +256,10 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
ext4_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
input->block_bitmap - start);
ext4_set_bit(input->block_bitmap - start, bh->b_data);
ext4_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap,
input->inode_bitmap - start);
ext4_set_bit(input->inode_bitmap - start, bh->b_data);
......@@ -268,7 +268,7 @@ static int setup_new_group_blocks(struct super_block *sb,
i < sbi->s_itb_per_group; i++, bit++, block++) {
struct buffer_head *it;
ext4_debug("clear inode block %#04lx (+%d)\n", block, bit);
ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh;
......@@ -291,7 +291,7 @@ static int setup_new_group_blocks(struct super_block *sb,
brelse(bh);
/* Mark unused entries in inode bitmap used */
ext4_debug("clear inode bitmap %#04x (+%ld)\n",
ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
input->inode_bitmap, input->inode_bitmap - start);
if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
err = PTR_ERR(bh);
......@@ -357,7 +357,7 @@ static int verify_reserved_gdb(struct super_block *sb,
struct buffer_head *primary)
{
const ext4_fsblk_t blk = primary->b_blocknr;
const unsigned long end = EXT4_SB(sb)->s_groups_count;
const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
unsigned three = 1;
unsigned five = 5;
unsigned seven = 7;
......@@ -656,12 +656,12 @@ static void update_backups(struct super_block *sb,
int blk_off, char *data, int size)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
const unsigned long last = sbi->s_groups_count;
const ext4_group_t last = sbi->s_groups_count;
const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
unsigned three = 1;
unsigned five = 5;
unsigned seven = 7;
unsigned group;
ext4_group_t group;
int rest = sb->s_blocksize - size;
handle_t *handle;
int err = 0, err2;
......@@ -716,7 +716,7 @@ static void update_backups(struct super_block *sb,
exit_err:
if (err) {
ext4_warning(sb, __FUNCTION__,
"can't update backup for group %d (err %d), "
"can't update backup for group %lu (err %d), "
"forcing fsck on next reboot", group, err);
sbi->s_mount_state &= ~EXT4_VALID_FS;
sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
......@@ -952,7 +952,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count)
{
ext4_fsblk_t o_blocks_count;
unsigned long o_groups_count;
ext4_group_t o_groups_count;
ext4_grpblk_t last;
ext4_grpblk_t add;
struct buffer_head * bh;
......@@ -1054,7 +1054,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
sb->s_dirt = 1;
unlock_super(sb);
ext4_debug("freeing blocks %lu through %llu\n", o_blocks_count,
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
......
此差异已折叠。
......@@ -480,7 +480,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ea_bdebug(bh, "refcount now=0; freeing");
if (ce)
mb_cache_entry_free(ce);
ext4_free_blocks(handle, inode, bh->b_blocknr, 1);
ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
get_bh(bh);
ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
} else {
......@@ -821,7 +821,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
new_bh = sb_getblk(sb, block);
if (!new_bh) {
getblk_failed:
ext4_free_blocks(handle, inode, block, 1);
ext4_free_blocks(handle, inode, block, 1, 1);
error = -EIO;
goto cleanup;
}
......
......@@ -1276,6 +1276,11 @@ void file_update_time(struct file *file)
sync_it = 1;
}
if (IS_I_VERSION(inode)) {
inode_inc_iversion(inode);
sync_it = 1;
}
if (sync_it)
mark_inode_dirty_sync(inode);
}
......
......@@ -232,7 +232,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __process_buffer(journal_t *journal, struct journal_head *jh,
struct buffer_head **bhs, int *batch_count)
struct buffer_head **bhs, int *batch_count,
transaction_t *transaction)
{
struct buffer_head *bh = jh2bh(jh);
int ret = 0;
......@@ -250,6 +251,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
transaction_t *t = jh->b_transaction;
tid_t tid = t->t_tid;
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
jbd2_log_start_commit(journal, tid);
......@@ -279,6 +281,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
bhs[*batch_count] = bh;
__buffer_relink_io(jh);
jbd_unlock_bh_state(bh);
transaction->t_chp_stats.cs_written++;
(*batch_count)++;
if (*batch_count == NR_BATCH) {
spin_unlock(&journal->j_list_lock);
......@@ -322,6 +325,8 @@ int jbd2_log_do_checkpoint(journal_t *journal)
if (!journal->j_checkpoint_transactions)
goto out;
transaction = journal->j_checkpoint_transactions;
if (transaction->t_chp_stats.cs_chp_time == 0)
transaction->t_chp_stats.cs_chp_time = jiffies;
this_tid = transaction->t_tid;
restart:
/*
......@@ -346,7 +351,8 @@ int jbd2_log_do_checkpoint(journal_t *journal)
retry = 1;
break;
}
retry = __process_buffer(journal, jh, bhs,&batch_count);
retry = __process_buffer(journal, jh, bhs, &batch_count,
transaction);
if (!retry && lock_need_resched(&journal->j_list_lock)){
spin_unlock(&journal->j_list_lock);
retry = 1;
......@@ -602,15 +608,15 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
/*
* There is one special case to worry about: if we have just pulled the
* buffer off a committing transaction's forget list, then even if the
* checkpoint list is empty, the transaction obviously cannot be
* dropped!
* buffer off a running or committing transaction's checkpoing list,
* then even if the checkpoint list is empty, the transaction obviously
* cannot be dropped!
*
* The locking here around j_committing_transaction is a bit sleazy.
* The locking here around t_state is a bit sleazy.
* See the comment at the end of jbd2_journal_commit_transaction().
*/
if (transaction == journal->j_committing_transaction) {
JBUFFER_TRACE(jh, "belongs to committing transaction");
if (transaction->t_state != T_FINISHED) {
JBUFFER_TRACE(jh, "belongs to running/committing transaction");
goto out;
}
......
......@@ -20,6 +20,8 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/jiffies.h>
#include <linux/crc32.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
......@@ -92,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh)
return 1;
}
/* Done it all: now write the commit record. We should have
/*
* Done it all: now submit the commit record. We should have
* cleaned up our previous buffers by now, so if we are in abort
* mode we can now just skip the rest of the journal write
* entirely.
*
* Returns 1 if the journal needs to be aborted or 0 on success
*/
static int journal_write_commit_record(journal_t *journal,
transaction_t *commit_transaction)
static int journal_submit_commit_record(journal_t *journal,
transaction_t *commit_transaction,
struct buffer_head **cbh,
__u32 crc32_sum)
{
struct journal_head *descriptor;
struct commit_header *tmp;
struct buffer_head *bh;
int i, ret;
int ret;
int barrier_done = 0;
if (is_journal_aborted(journal))
......@@ -116,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal,
bh = jh2bh(descriptor);
/* AKPM: buglet - add `i' to tmp! */
for (i = 0; i < bh->b_size; i += 512) {
journal_header_t *tmp = (journal_header_t*)bh->b_data;
tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
tmp = (struct commit_header *)bh->b_data;
tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
if (JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_COMPAT_CHECKSUM)) {
tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
}
JBUFFER_TRACE(descriptor, "write commit block");
JBUFFER_TRACE(descriptor, "submit commit block");
lock_buffer(bh);
set_buffer_dirty(bh);
if (journal->j_flags & JBD2_BARRIER) {
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
if (journal->j_flags & JBD2_BARRIER &&
!JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
set_buffer_ordered(bh);
barrier_done = 1;
}
ret = sync_dirty_buffer(bh);
ret = submit_bh(WRITE, bh);
/* is it possible for another commit to fail at roughly
* the same time as this one? If so, we don't want to
* trust the barrier flag in the super, but instead want
......@@ -151,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal,
clear_buffer_ordered(bh);
set_buffer_uptodate(bh);
set_buffer_dirty(bh);
ret = sync_dirty_buffer(bh);
ret = submit_bh(WRITE, bh);
}
put_bh(bh); /* One for getblk() */
jbd2_journal_put_journal_head(descriptor);
*cbh = bh;
return ret;
}
/*
* This function along with journal_submit_commit_record
* allows to write the commit record asynchronously.
*/
static int journal_wait_on_commit_record(struct buffer_head *bh)
{
int ret = 0;
clear_buffer_dirty(bh);
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh)))
ret = -EIO;
put_bh(bh); /* One for getblk() */
jbd2_journal_put_journal_head(bh2jh(bh));
return (ret == -EIO);
return ret;
}
/*
* Wait for all submitted IO to complete.
*/
static int journal_wait_on_locked_list(journal_t *journal,
transaction_t *commit_transaction)
{
int ret = 0;
struct journal_head *jh;
while (commit_transaction->t_locked_list) {
struct buffer_head *bh;
jh = commit_transaction->t_locked_list->b_tprev;
bh = jh2bh(jh);
get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh)))
ret = -EIO;
spin_lock(&journal->j_list_lock);
}
if (!inverted_lock(journal, bh)) {
put_bh(bh);
spin_lock(&journal->j_list_lock);
continue;
}
if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
__jbd2_journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
put_bh(bh);
} else {
jbd_unlock_bh_state(bh);
}
put_bh(bh);
cond_resched_lock(&journal->j_list_lock);
}
return ret;
}
static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
{
int i;
......@@ -274,7 +350,21 @@ static void journal_submit_data_buffers(journal_t *journal,
journal_do_submit_data(wbuf, bufs);
}
static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
{
struct page *page = bh->b_page;
char *addr;
__u32 checksum;
addr = kmap_atomic(page, KM_USER0);
checksum = crc32_be(crc32_sum,
(void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
kunmap_atomic(addr, KM_USER0);
return checksum;
}
static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
unsigned long long block)
{
tag->t_blocknr = cpu_to_be32(block & (u32)~0);
......@@ -290,6 +380,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
*/
void jbd2_journal_commit_transaction(journal_t *journal)
{
struct transaction_stats_s stats;
transaction_t *commit_transaction;
struct journal_head *jh, *new_jh, *descriptor;
struct buffer_head **wbuf = journal->j_wbuf;
......@@ -305,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
int tag_flag;
int i;
int tag_bytes = journal_tag_bytes(journal);
struct buffer_head *cbh = NULL; /* For transactional checksums */
__u32 crc32_sum = ~0;
/*
* First job: lock down the current transaction and wait for
......@@ -337,6 +430,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
stats.u.run.rs_wait = commit_transaction->t_max_wait;
stats.u.run.rs_locked = jiffies;
stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
stats.u.run.rs_locked);
spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) {
DEFINE_WAIT(wait);
......@@ -407,6 +505,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
jbd2_journal_switch_revoke_table(journal);
stats.u.run.rs_flushing = jiffies;
stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
stats.u.run.rs_flushing);
commit_transaction->t_state = T_FLUSH;
journal->j_committing_transaction = commit_transaction;
journal->j_running_transaction = NULL;
......@@ -440,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
journal_submit_data_buffers(journal, commit_transaction);
/*
* Wait for all previously submitted IO to complete.
* Wait for all previously submitted IO to complete if commit
* record is to be written synchronously.
*/
spin_lock(&journal->j_list_lock);
while (commit_transaction->t_locked_list) {
struct buffer_head *bh;
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
err = journal_wait_on_locked_list(journal,
commit_transaction);
jh = commit_transaction->t_locked_list->b_tprev;
bh = jh2bh(jh);
get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
spin_lock(&journal->j_list_lock);
}
if (!inverted_lock(journal, bh)) {
put_bh(bh);
spin_lock(&journal->j_list_lock);
continue;
}
if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
__jbd2_journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
put_bh(bh);
} else {
jbd_unlock_bh_state(bh);
}
put_bh(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
if (err)
......@@ -498,6 +577,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
commit_transaction->t_state = T_COMMIT;
stats.u.run.rs_logging = jiffies;
stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
stats.u.run.rs_logging);
stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
stats.u.run.rs_blocks_logged = 0;
descriptor = NULL;
bufs = 0;
while (commit_transaction->t_buffers) {
......@@ -639,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
start_journal_io:
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
/*
* Compute checksum.
*/
if (JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_COMPAT_CHECKSUM)) {
crc32_sum =
jbd2_checksum_data(crc32_sum, bh);
}
lock_buffer(bh);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
......@@ -646,6 +740,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
submit_bh(WRITE, bh);
}
cond_resched();
stats.u.run.rs_blocks_logged += bufs;
/* Force a new descriptor to be generated next
time round the loop. */
......@@ -654,6 +749,23 @@ void jbd2_journal_commit_transaction(journal_t *journal)
}
}
/* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
__jbd2_journal_abort_hard(journal);
spin_lock(&journal->j_list_lock);
err = journal_wait_on_locked_list(journal,
commit_transaction);
spin_unlock(&journal->j_list_lock);
if (err)
__jbd2_journal_abort_hard(journal);
}
/* Lo and behold: we have just managed to send a transaction to
the log. Before we can commit it, wait for the IO so far to
complete. Control buffers being written are on the
......@@ -753,8 +865,14 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(3, "JBD: commit phase 6\n");
if (journal_write_commit_record(journal, commit_transaction))
err = -EIO;
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
__jbd2_journal_abort_hard(journal);
}
err = journal_wait_on_commit_record(cbh);
if (err)
jbd2_journal_abort(journal, err);
......@@ -816,6 +934,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
cp_transaction = jh->b_cp_transaction;
if (cp_transaction) {
JBUFFER_TRACE(jh, "remove from old cp transaction");
cp_transaction->t_chp_stats.cs_dropped++;
__jbd2_journal_remove_checkpoint(jh);
}
......@@ -867,10 +986,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
}
spin_unlock(&journal->j_list_lock);
/*
* This is a bit sleazy. We borrow j_list_lock to protect
* journal->j_committing_transaction in __jbd2_journal_remove_checkpoint.
* Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but
* it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint
* This is a bit sleazy. We use j_list_lock to protect transition
* of a transaction into T_FINISHED state and calling
* __jbd2_journal_drop_transaction(). Otherwise we could race with
* other checkpointing code processing the transaction...
*/
spin_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
......@@ -890,6 +1009,36 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT(commit_transaction->t_state == T_COMMIT);
commit_transaction->t_start = jiffies;
stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging,
commit_transaction->t_start);
/*
* File the transaction for history
*/
stats.ts_type = JBD2_STATS_RUN;
stats.ts_tid = commit_transaction->t_tid;
stats.u.run.rs_handle_count = commit_transaction->t_handle_count;
spin_lock(&journal->j_history_lock);
memcpy(journal->j_history + journal->j_history_cur, &stats,
sizeof(stats));
if (++journal->j_history_cur == journal->j_history_max)
journal->j_history_cur = 0;
/*
* Calculate overall stats
*/
journal->j_stats.ts_tid++;
journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait;
journal->j_stats.u.run.rs_running += stats.u.run.rs_running;
journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked;
journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing;
journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging;
journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count;
journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks;
journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged;
spin_unlock(&journal->j_history_lock);
commit_transaction->t_state = T_FINISHED;
J_ASSERT(commit_transaction == journal->j_committing_transaction);
journal->j_commit_sequence = commit_transaction->t_tid;
......
此差异已折叠。
此差异已折叠。
......@@ -171,13 +171,15 @@ int __init jbd2_journal_init_revoke_caches(void)
{
jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
sizeof(struct jbd2_revoke_record_s),
0, SLAB_HWCACHE_ALIGN, NULL);
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
if (jbd2_revoke_record_cache == 0)
return -ENOMEM;
jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
sizeof(struct jbd2_revoke_table_s),
0, 0, NULL);
0, SLAB_TEMPORARY, NULL);
if (jbd2_revoke_table_cache == 0) {
kmem_cache_destroy(jbd2_revoke_record_cache);
jbd2_revoke_record_cache = NULL;
......
此差异已折叠。
......@@ -446,6 +446,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
}
return seg;
}
EXPORT_SYMBOL(iov_shorten);
ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
......
......@@ -310,6 +310,8 @@ static inline int constant_fls(int x)
_find_first_zero_bit_le(p,sz)
#define ext2_find_next_zero_bit(p,sz,off) \
_find_next_zero_bit_le(p,sz,off)
#define ext2_find_next_bit(p, sz, off) \
_find_next_bit_le(p, sz, off)
/*
* Minix is defined to use little-endian byte ordering.
......
......@@ -14,5 +14,7 @@
generic_find_first_zero_le_bit((unsigned long *)(addr), (size))
#define ext2_find_next_zero_bit(addr, size, off) \
generic_find_next_zero_le_bit((unsigned long *)(addr), (size), (off))
#define ext2_find_next_bit(addr, size, off) \
generic_find_next_le_bit((unsigned long *)(addr), (size), (off))
#endif /* _ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_ */
......@@ -20,6 +20,8 @@
#define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr)
#define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset)
#define generic_find_next_le_bit(addr, size, offset) \
find_next_bit(addr, size, offset)
#elif defined(__BIG_ENDIAN)
......@@ -42,6 +44,8 @@
extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr,
unsigned long size, unsigned long offset);
extern unsigned long generic_find_next_le_bit(const unsigned long *addr,
unsigned long size, unsigned long offset);
#else
#error "Please fix <asm/byteorder.h>"
......
......@@ -410,6 +410,8 @@ static inline int ext2_find_next_zero_bit(const void *vaddr, unsigned size,
res = ext2_find_first_zero_bit (p, size - 32 * (p - addr));
return (p - addr) * 32 + res;
}
#define ext2_find_next_bit(addr, size, off) \
generic_find_next_le_bit((unsigned long *)(addr), (size), (off))
#endif /* __KERNEL__ */
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册