提交 d756d10e 编写于 作者: L Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: extent macros cleanup
  Fix compilation with EXT_DEBUG, also fix leXX_to_cpu conversions.
  ext4: remove extra IS_RDONLY() check
  ext4: Use is_power_of_2()
  Use zero_user_page() in ext4 where possible
  ext4: Remove 65000 subdirectory limit
  ext4: Expand extra_inodes space per the s_{want,min}_extra_isize fields 
  ext4: Add nanosecond timestamps
  jbd2: Move jbd2-debug file to debugfs
  jbd2: Fix CONFIG_JBD_DEBUG ifdef to be CONFIG_JBD2_DEBUG
  ext4: Set the journal JBD2_FEATURE_INCOMPAT_64BIT on large devices
  ext4: Make extents code sanely handle on-disk corruption
  ext4: copy i_flags to inode flags on write
  ext4: Enable extents by default
  Change on-disk format to support 2^15 uninitialized extents
  write support for preallocated blocks
  fallocate support in ext4
  sys_fallocate() implementation on i386, x86_64 and powerpc
......@@ -323,3 +323,4 @@ ENTRY(sys_call_table)
.long sys_signalfd
.long sys_timerfd
.long sys_eventfd
.long sys_fallocate
......@@ -773,6 +773,13 @@ asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
return sys_truncate(path, (high << 32) | low);
}
asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
u32 lenhi, u32 lenlo)
{
return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
((loff_t)lenhi << 32) | lenlo);
}
asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
unsigned long low)
{
......
......@@ -719,4 +719,5 @@ ia32_sys_call_table:
.quad compat_sys_signalfd
.quad compat_sys_timerfd
.quad sys_eventfd
.quad sys32_fallocate
ia32_syscall_end:
......@@ -879,3 +879,11 @@ asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
len, advice);
}
asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
unsigned offset_hi, unsigned len_lo,
unsigned len_hi)
{
return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
((u64)len_hi << 32) | len_lo);
}
......@@ -251,7 +251,7 @@ config JBD2
config JBD2_DEBUG
bool "JBD2 (ext4dev/ext4) debugging support"
depends on JBD2
depends on JBD2 && DEBUG_FS
help
If you are using the ext4dev/ext4 journaled file system (or
potentially any other filesystem/device using JBD2), this option
......@@ -260,10 +260,10 @@ config JBD2_DEBUG
By default, the debugging output will be turned off.
If you select Y here, then you will be able to turn on debugging
with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between
1 and 5. The higher the number, the more debugging output is
generated. To turn debugging off again, do
"echo 0 > /proc/sys/fs/jbd2-debug".
with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
number between 1 and 5. The higher the number, the more debugging
output is generated. To turn debugging off again, do
"echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
config FS_MBCACHE
# Meta block cache for Extended Attributes (ext2/ext3/ext4)
......
......@@ -517,7 +517,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
/*
* An HJ special. This is expensive...
*/
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
jbd_unlock_bh_state(bitmap_bh);
{
struct buffer_head *debug_bh;
......@@ -1597,7 +1597,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
performed_allocation = 1;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
{
struct buffer_head *debug_bh;
......
此差异已折叠。
......@@ -134,5 +134,6 @@ const struct inode_operations ext4_file_inode_operations = {
.removexattr = generic_removexattr,
#endif
.permission = ext4_permission,
.fallocate = ext4_fallocate,
};
......@@ -563,7 +563,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
inode->i_ino = ino;
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
ext4_current_time(inode);
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
......@@ -595,9 +596,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
spin_unlock(&sbi->s_next_gen_lock);
ei->i_state = EXT4_STATE_NEW;
ei->i_extra_isize =
(EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
......
......@@ -726,7 +726,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
/* We are done with atomic stuff, now do the rest of housekeeping */
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
/* had we spliced it onto indirect block? */
......@@ -1766,7 +1766,6 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
struct inode *inode = mapping->host;
struct buffer_head *bh;
int err = 0;
void *kaddr;
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
......@@ -1778,10 +1777,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
*/
if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
ext4_should_writeback_data(inode) && PageUptodate(page)) {
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + offset, 0, length);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
zero_user_page(page, offset, length, KM_USER0);
set_page_dirty(page);
goto unlock;
}
......@@ -1834,10 +1830,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + offset, 0, length);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
zero_user_page(page, offset, length, KM_USER0);
BUFFER_TRACE(bh, "zeroed end of block");
......@@ -2375,7 +2368,7 @@ void ext4_truncate(struct inode *inode)
ext4_discard_reservation(inode);
mutex_unlock(&ei->truncate_mutex);
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
/*
......@@ -2583,6 +2576,25 @@ void ext4_set_inode_flags(struct inode *inode)
inode->i_flags |= S_DIRSYNC;
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
void ext4_get_inode_flags(struct ext4_inode_info *ei)
{
unsigned int flags = ei->vfs_inode.i_flags;
ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
if (flags & S_SYNC)
ei->i_flags |= EXT4_SYNC_FL;
if (flags & S_APPEND)
ei->i_flags |= EXT4_APPEND_FL;
if (flags & S_IMMUTABLE)
ei->i_flags |= EXT4_IMMUTABLE_FL;
if (flags & S_NOATIME)
ei->i_flags |= EXT4_NOATIME_FL;
if (flags & S_DIRSYNC)
ei->i_flags |= EXT4_DIRSYNC_FL;
}
void ext4_read_inode(struct inode * inode)
{
struct ext4_iloc iloc;
......@@ -2610,10 +2622,6 @@ void ext4_read_inode(struct inode * inode)
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
ei->i_state = 0;
ei->i_dir_start_lookup = 0;
......@@ -2691,6 +2699,11 @@ void ext4_read_inode(struct inode * inode)
} else
ei->i_extra_isize = 0;
EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
......@@ -2744,6 +2757,7 @@ static int ext4_do_update_inode(handle_t *handle,
if (ei->i_state & EXT4_STATE_NEW)
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
ext4_get_inode_flags(ei);
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
if(!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
......@@ -2771,9 +2785,12 @@ static int ext4_do_update_inode(handle_t *handle,
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
......@@ -3081,6 +3098,39 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
return err;
}
/*
* Expand an inode by new_extra_isize bytes.
* Returns 0 on success or negative error number on failure.
*/
int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize,
struct ext4_iloc iloc, handle_t *handle)
{
struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
return 0;
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
/* No extended attributes present */
if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) ||
header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
new_extra_isize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
return 0;
}
/* try to expand with EAs present */
return ext4_expand_extra_isize_ea(inode, new_extra_isize,
raw_inode, handle);
}
/*
* What we do here is to mark the in-core inode as clean with respect to inode
* dirtiness (it may still be data-dirty).
......@@ -3105,10 +3155,38 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
{
struct ext4_iloc iloc;
int err;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
static unsigned int mnt_count;
int err, ret;
might_sleep();
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
/*
* We need extra buffer credits since we may write into EA block
* with this same handle. If journal_extend fails, then it will
* only result in a minor loss of functionality for that inode.
* If this is felt to be critical, then e2fsck should be run to
* force a large enough s_min_extra_isize.
*/
if ((jbd2_journal_extend(handle,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
ret = ext4_expand_extra_isize(inode,
sbi->s_want_extra_isize,
iloc, handle);
if (ret) {
EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
if (mnt_count != sbi->s_es->s_mnt_count) {
ext4_warning(inode->i_sb, __FUNCTION__,
"Unable to expand inode %lu. Delete"
" some EAs or run e2fsck.",
inode->i_ino);
mnt_count = sbi->s_es->s_mnt_count;
}
}
}
}
if (!err)
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
return err;
......@@ -3197,7 +3275,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
*/
journal = EXT4_JOURNAL(inode);
if (is_journal_aborted(journal) || IS_RDONLY(inode))
if (is_journal_aborted(journal))
return -EROFS;
jbd2_journal_lock_updates(journal);
......
......@@ -28,6 +28,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
switch (cmd) {
case EXT4_IOC_GETFLAGS:
ext4_get_inode_flags(ei);
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
......@@ -96,7 +97,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
ei->i_flags = flags;
ext4_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
......@@ -133,14 +134,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
return PTR_ERR(handle);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
inode->i_generation = generation;
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
ext4_journal_stop(handle);
return err;
}
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
case EXT4_IOC_WAIT_FOR_READONLY:
/*
* This is racy - by the time we're woken up and running,
......@@ -282,7 +283,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC32_SETVERSION_OLD:
cmd = EXT4_IOC_SETVERSION_OLD;
break;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
case EXT4_IOC32_WAIT_FOR_READONLY:
cmd = EXT4_IOC_WAIT_FOR_READONLY;
break;
......
......@@ -1295,7 +1295,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
......@@ -1629,6 +1629,35 @@ static int ext4_delete_entry (handle_t *handle,
return -ENOENT;
}
/*
* DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
* since this indicates that nlinks count was previously 1.
*/
static void ext4_inc_count(handle_t *handle, struct inode *inode)
{
inc_nlink(inode);
if (is_dx(inode) && inode->i_nlink > 1) {
/* limit is 16-bit i_links_count */
if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
inode->i_nlink = 1;
EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
}
}
}
/*
* If a directory had nlink == 1, then we should let it be 1. This indicates
* directory has >EXT4_LINK_MAX subdirs.
*/
static void ext4_dec_count(handle_t *handle, struct inode *inode)
{
drop_nlink(inode);
if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
inc_nlink(inode);
}
static int ext4_add_nondir(handle_t *handle,
struct dentry *dentry, struct inode *inode)
{
......@@ -1725,7 +1754,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
struct ext4_dir_entry_2 * de;
int err, retries = 0;
if (dir->i_nlink >= EXT4_LINK_MAX)
if (EXT4_DIR_LINK_MAX(dir))
return -EMLINK;
retry:
......@@ -1748,7 +1777,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
dir_block = ext4_bread (handle, inode, 0, 1, &err);
if (!dir_block) {
drop_nlink(inode); /* is this nlink == 0? */
ext4_dec_count(handle, inode); /* is this nlink == 0? */
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
......@@ -1780,7 +1809,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
iput (inode);
goto out_stop;
}
inc_nlink(dir);
ext4_inc_count(handle, dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
d_instantiate(dentry, inode);
......@@ -2045,9 +2074,9 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_rmdir;
if (inode->i_nlink != 2)
if (!EXT4_DIR_LINK_EMPTY(inode))
ext4_warning (inode->i_sb, "ext4_rmdir",
"empty directory has nlink!=2 (%d)",
"empty directory has too many links (%d)",
inode->i_nlink);
inode->i_version++;
clear_nlink(inode);
......@@ -2056,9 +2085,9 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
* recovery. */
inode->i_size = 0;
ext4_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
drop_nlink(dir);
ext4_dec_count(handle, dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
......@@ -2106,13 +2135,13 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
drop_nlink(inode);
ext4_dec_count(handle, inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime;
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
retval = 0;
......@@ -2159,7 +2188,7 @@ static int ext4_symlink (struct inode * dir,
err = __page_symlink(inode, symname, l,
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) {
drop_nlink(inode);
ext4_dec_count(handle, inode);
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
......@@ -2185,8 +2214,9 @@ static int ext4_link (struct dentry * old_dentry,
struct inode *inode = old_dentry->d_inode;
int err, retries = 0;
if (inode->i_nlink >= EXT4_LINK_MAX)
if (EXT4_DIR_LINK_MAX(inode))
return -EMLINK;
/*
* Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
* otherwise has the potential to corrupt the orphan inode list.
......@@ -2203,8 +2233,8 @@ static int ext4_link (struct dentry * old_dentry,
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
inode->i_ctime = CURRENT_TIME_SEC;
inc_nlink(inode);
inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
atomic_inc(&inode->i_count);
err = ext4_add_nondir(handle, dentry, inode);
......@@ -2305,7 +2335,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
old_inode->i_ctime = ext4_current_time(old_inode);
ext4_mark_inode_dirty(handle, old_inode);
/*
......@@ -2337,10 +2367,10 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
}
if (new_inode) {
drop_nlink(new_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
ext4_dec_count(handle, new_inode);
new_inode->i_ctime = ext4_current_time(new_inode);
}
old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
ext4_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
......@@ -2348,11 +2378,13 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
ext4_journal_dirty_metadata(handle, dir_bh);
drop_nlink(old_dir);
ext4_dec_count(handle, old_dir);
if (new_inode) {
drop_nlink(new_inode);
/* checked empty_dir above, can't have another parent,
* ext3_dec_count() won't work for many-linked dirs */
new_inode->i_nlink = 0;
} else {
inc_nlink(new_dir);
ext4_inc_count(handle, new_dir);
ext4_update_dx_flag(new_dir);
ext4_mark_inode_dirty(handle, new_dir);
}
......
......@@ -36,6 +36,7 @@
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
#include <linux/log2.h>
#include <asm/uaccess.h>
......@@ -734,7 +735,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
Opt_grpquota, Opt_extents,
Opt_grpquota, Opt_extents, Opt_noextents,
};
static match_table_t tokens = {
......@@ -785,6 +786,7 @@ static match_table_t tokens = {
{Opt_usrquota, "usrquota"},
{Opt_barrier, "barrier=%u"},
{Opt_extents, "extents"},
{Opt_noextents, "noextents"},
{Opt_err, NULL},
{Opt_resize, "resize"},
};
......@@ -1120,6 +1122,9 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_extents:
set_opt (sbi->s_mount_opt, EXTENTS);
break;
case Opt_noextents:
clear_opt (sbi->s_mount_opt, EXTENTS);
break;
default:
printk (KERN_ERR
"EXT4-fs: Unrecognized mount option \"%s\" "
......@@ -1551,6 +1556,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, RESERVATION);
/*
* turn on extents feature by default in ext4 filesystem
* User -o noextents to turn it off
*/
set_opt(sbi->s_mount_opt, EXTENTS);
if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
NULL, 0))
goto failed_mount;
......@@ -1634,13 +1645,15 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
(sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
(!is_power_of_2(sbi->s_inode_size)) ||
(sbi->s_inode_size > blocksize)) {
printk (KERN_ERR
"EXT4-fs: unsupported inode size: %d\n",
sbi->s_inode_size);
goto failed_mount;
}
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
}
sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
......@@ -1803,6 +1816,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount3;
}
if (ext4_blocks_count(es) > 0xffffffffULL &&
!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_64BIT)) {
printk(KERN_ERR "ext4: Failed to set 64-bit journal feature\n");
goto failed_mount4;
}
/* We have now updated the journal if required, so we can
* validate the data journaling mode. */
switch (test_opt(sb, DATA_FLAGS)) {
......@@ -1857,6 +1877,32 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
}
ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_want_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_want_extra_isize);
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_min_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_min_extra_isize);
}
}
/* Check if enough inode space is available */
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
sbi->s_inode_size) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
printk(KERN_INFO "EXT4-fs: required extra inode space not"
"available.\n");
}
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
......
......@@ -66,13 +66,6 @@
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
#define IHDR(inode, raw_inode) \
((struct ext4_xattr_ibody_header *) \
((void *)raw_inode + \
EXT4_GOOD_OLD_INODE_SIZE + \
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
#ifdef EXT4_XATTR_DEBUG
# define ea_idebug(inode, f...) do { \
printk(KERN_DEBUG "inode %s:%lu: ", \
......@@ -508,6 +501,24 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
return;
}
/*
* Find the available free space for EAs. This also returns the total number of
* bytes used by EA entries.
*/
static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
size_t *min_offs, void *base, int *total)
{
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
*total += EXT4_XATTR_LEN(last->e_name_len);
if (!last->e_value_block && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < *min_offs)
*min_offs = offs;
}
}
return (*min_offs - ((void *)last - base) - sizeof(__u32));
}
struct ext4_xattr_info {
int name_index;
const char *name;
......@@ -1013,7 +1024,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
}
if (!error) {
ext4_xattr_update_super_block(handle, inode->i_sb);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
if (!value)
EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext4_mark_iloc_dirty, even with
......@@ -1066,6 +1079,253 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
return error;
}
/*
* Shift the EA entries in the inode to create space for the increased
* i_extra_isize.
*/
static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
int value_offs_shift, void *to,
void *from, size_t n, int blocksize)
{
struct ext4_xattr_entry *last = entry;
int new_offs;
/* Adjust the value offsets of the entries */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
if (!last->e_value_block && last->e_value_size) {
new_offs = le16_to_cpu(last->e_value_offs) +
value_offs_shift;
BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
> blocksize);
last->e_value_offs = cpu_to_le16(new_offs);
}
}
/* Shift the entries by n bytes */
memmove(to, from, n);
}
/*
* Expand an inode by new_extra_isize bytes when EAs are present.
* Returns 0 on success or negative error number on failure.
*/
int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle)
{
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry, *last, *first;
struct buffer_head *bh = NULL;
struct ext4_xattr_ibody_find *is = NULL;
struct ext4_xattr_block_find *bs = NULL;
char *buffer = NULL, *b_entry_name = NULL;
size_t min_offs, free;
int total_ino, total_blk;
void *base, *start, *end;
int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize;
down_write(&EXT4_I(inode)->xattr_sem);
retry:
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
up_write(&EXT4_I(inode)->xattr_sem);
return 0;
}
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
/*
* Check if enough free space is available in the inode to shift the
* entries ahead by new_extra_isize.
*/
base = start = entry;
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
min_offs = end - base;
last = entry;
total_ino = sizeof(struct ext4_xattr_ibody_header);
free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
if (free >= new_extra_isize) {
entry = IFIRST(header);
ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
- new_extra_isize, (void *)raw_inode +
EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
(void *)header, total_ino,
inode->i_sb->s_blocksize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
error = 0;
goto cleanup;
}
/*
* Enough free space isn't available in the inode, check if
* EA block can hold new_extra_isize bytes.
*/
if (EXT4_I(inode)->i_file_acl) {
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
goto cleanup;
if (ext4_xattr_check_block(bh)) {
ext4_error(inode->i_sb, __FUNCTION__,
"inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
base = BHDR(bh);
first = BFIRST(bh);
end = bh->b_data + bh->b_size;
min_offs = end - base;
free = ext4_xattr_free_space(first, &min_offs, base,
&total_blk);
if (free < new_extra_isize) {
if (!tried_min_extra_isize && s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
brelse(bh);
goto retry;
}
error = -1;
goto cleanup;
}
} else {
free = inode->i_sb->s_blocksize;
}
while (new_extra_isize > 0) {
size_t offs, size, entry_size;
struct ext4_xattr_entry *small_entry = NULL;
struct ext4_xattr_info i = {
.value = NULL,
.value_len = 0,
};
unsigned int total_size; /* EA entry size + value size */
unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
unsigned int min_total_size = ~0U;
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
if (!is || !bs) {
error = -ENOMEM;
goto cleanup;
}
is->s.not_found = -ENODATA;
bs->s.not_found = -ENODATA;
is->iloc.bh = NULL;
bs->bh = NULL;
last = IFIRST(header);
/* Find the entry best suited to be pushed into EA block */
entry = NULL;
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
total_size =
EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
EXT4_XATTR_LEN(last->e_name_len);
if (total_size <= free && total_size < min_total_size) {
if (total_size < new_extra_isize) {
small_entry = last;
} else {
entry = last;
min_total_size = total_size;
}
}
}
if (entry == NULL) {
if (small_entry) {
entry = small_entry;
} else {
if (!tried_min_extra_isize &&
s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
goto retry;
}
error = -1;
goto cleanup;
}
}
offs = le16_to_cpu(entry->e_value_offs);
size = le32_to_cpu(entry->e_value_size);
entry_size = EXT4_XATTR_LEN(entry->e_name_len);
i.name_index = entry->e_name_index,
buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
if (!buffer || !b_entry_name) {
error = -ENOMEM;
goto cleanup;
}
/* Save the entry name and the entry value */
memcpy(buffer, (void *)IFIRST(header) + offs,
EXT4_XATTR_SIZE(size));
memcpy(b_entry_name, entry->e_name, entry->e_name_len);
b_entry_name[entry->e_name_len] = '\0';
i.name = b_entry_name;
error = ext4_get_inode_loc(inode, &is->iloc);
if (error)
goto cleanup;
error = ext4_xattr_ibody_find(inode, &i, is);
if (error)
goto cleanup;
/* Remove the chosen entry from the inode */
error = ext4_xattr_ibody_set(handle, inode, &i, is);
entry = IFIRST(header);
if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
shift_bytes = new_extra_isize;
else
shift_bytes = entry_size + size;
/* Adjust the offsets and shift the remaining entries ahead */
ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
shift_bytes, (void *)raw_inode +
EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
(void *)header, total_ino - entry_size,
inode->i_sb->s_blocksize);
extra_isize += shift_bytes;
new_extra_isize -= shift_bytes;
EXT4_I(inode)->i_extra_isize = extra_isize;
i.name = b_entry_name;
i.value = buffer;
i.value_len = cpu_to_le32(size);
error = ext4_xattr_block_find(inode, &i, bs);
if (error)
goto cleanup;
/* Add entry which was removed from the inode into the block */
error = ext4_xattr_block_set(handle, inode, &i, bs);
if (error)
goto cleanup;
kfree(b_entry_name);
kfree(buffer);
brelse(is->iloc.bh);
kfree(is);
kfree(bs);
}
brelse(bh);
up_write(&EXT4_I(inode)->xattr_sem);
return 0;
cleanup:
kfree(b_entry_name);
kfree(buffer);
if (is)
brelse(is->iloc.bh);
kfree(is);
kfree(bs);
brelse(bh);
up_write(&EXT4_I(inode)->xattr_sem);
return error;
}
/*
* ext4_xattr_delete_inode()
*
......
......@@ -56,6 +56,13 @@ struct ext4_xattr_entry {
#define EXT4_XATTR_SIZE(size) \
(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
#define IHDR(inode, raw_inode) \
((struct ext4_xattr_ibody_header *) \
((void *)raw_inode + \
EXT4_GOOD_OLD_INODE_SIZE + \
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
# ifdef CONFIG_EXT4DEV_FS_XATTR
extern struct xattr_handler ext4_xattr_user_handler;
......@@ -74,6 +81,9 @@ extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *,
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
extern int init_ext4_xattr(void);
extern void exit_ext4_xattr(void);
......@@ -129,6 +139,13 @@ exit_ext4_xattr(void)
{
}
static inline int
ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle)
{
return -EOPNOTSUPP;
}
#define ext4_xattr_handlers NULL
# endif /* CONFIG_EXT4DEV_FS_XATTR */
......
......@@ -35,6 +35,7 @@
#include <linux/kthread.h>
#include <linux/poison.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <asm/uaccess.h>
#include <asm/page.h>
......@@ -528,7 +529,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
{
int err = 0;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
spin_lock(&journal->j_state_lock);
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_EMERG
......@@ -1709,7 +1710,7 @@ void jbd2_slab_free(void *ptr, size_t size)
* Journal_head storage management
*/
static struct kmem_cache *jbd2_journal_head_cache;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
static atomic_t nr_journal_heads = ATOMIC_INIT(0);
#endif
......@@ -1747,7 +1748,7 @@ static struct journal_head *journal_alloc_journal_head(void)
struct journal_head *ret;
static unsigned long last_warning;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
atomic_inc(&nr_journal_heads);
#endif
ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
......@@ -1768,7 +1769,7 @@ static struct journal_head *journal_alloc_journal_head(void)
static void journal_free_journal_head(struct journal_head *jh)
{
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
atomic_dec(&nr_journal_heads);
memset(jh, JBD_POISON_FREE, sizeof(*jh));
#endif
......@@ -1951,64 +1952,50 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
}
/*
* /proc tunables
* debugfs tunables
*/
#if defined(CONFIG_JBD_DEBUG)
int jbd2_journal_enable_debug;
#if defined(CONFIG_JBD2_DEBUG)
u8 jbd2_journal_enable_debug;
EXPORT_SYMBOL(jbd2_journal_enable_debug);
#endif
#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)
static struct proc_dir_entry *proc_jbd_debug;
#define JBD2_DEBUG_NAME "jbd2-debug"
static int read_jbd_debug(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
int ret;
struct dentry *jbd2_debugfs_dir, *jbd2_debug;
ret = sprintf(page + off, "%d\n", jbd2_journal_enable_debug);
*eof = 1;
return ret;
static void __init jbd2_create_debugfs_entry(void)
{
jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
if (jbd2_debugfs_dir)
jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO,
jbd2_debugfs_dir,
&jbd2_journal_enable_debug);
}
static int write_jbd_debug(struct file *file, const char __user *buffer,
unsigned long count, void *data)
static void __exit jbd2_remove_debugfs_entry(void)
{
char buf[32];
if (count > ARRAY_SIZE(buf) - 1)
count = ARRAY_SIZE(buf) - 1;
if (copy_from_user(buf, buffer, count))
return -EFAULT;
buf[ARRAY_SIZE(buf) - 1] = '\0';
jbd2_journal_enable_debug = simple_strtoul(buf, NULL, 10);
return count;
if (jbd2_debug)
debugfs_remove(jbd2_debug);
if (jbd2_debugfs_dir)
debugfs_remove(jbd2_debugfs_dir);
}
#define JBD_PROC_NAME "sys/fs/jbd2-debug"
#else
static void __init create_jbd_proc_entry(void)
static void __init jbd2_create_debugfs_entry(void)
{
proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
if (proc_jbd_debug) {
/* Why is this so hard? */
proc_jbd_debug->read_proc = read_jbd_debug;
proc_jbd_debug->write_proc = write_jbd_debug;
}
do {
} while (0);
}
static void __exit jbd2_remove_jbd_proc_entry(void)
static void __exit jbd2_remove_debugfs_entry(void)
{
if (proc_jbd_debug)
remove_proc_entry(JBD_PROC_NAME, NULL);
do {
} while (0);
}
#else
#define create_jbd_proc_entry() do {} while (0)
#define jbd2_remove_jbd_proc_entry() do {} while (0)
#endif
struct kmem_cache *jbd2_handle_cache;
......@@ -2067,18 +2054,18 @@ static int __init journal_init(void)
ret = journal_init_caches();
if (ret != 0)
jbd2_journal_destroy_caches();
create_jbd_proc_entry();
jbd2_create_debugfs_entry();
return ret;
}
static void __exit journal_exit(void)
{
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
int n = atomic_read(&nr_journal_heads);
if (n)
printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
#endif
jbd2_remove_jbd_proc_entry();
jbd2_remove_debugfs_entry();
jbd2_journal_destroy_caches();
}
......
......@@ -295,7 +295,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
printk(KERN_ERR "JBD: error %d scanning journal\n", err);
++journal->j_transaction_sequence;
} else {
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
#endif
jbd_debug(0,
......
......@@ -26,6 +26,7 @@
#include <linux/syscalls.h>
#include <linux/rcupdate.h>
#include <linux/audit.h>
#include <linux/falloc.h>
int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
......@@ -352,6 +353,64 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
}
#endif
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len)
{
struct file *file;
struct inode *inode;
long ret = -EINVAL;
if (offset < 0 || len <= 0)
goto out;
/* Return error if mode is not supported */
ret = -EOPNOTSUPP;
if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
goto out;
ret = -EBADF;
file = fget(fd);
if (!file)
goto out;
if (!(file->f_mode & FMODE_WRITE))
goto out_fput;
/*
* Revalidate the write permissions, in case security policy has
* changed since the files were opened.
*/
ret = security_file_permission(file, MAY_WRITE);
if (ret)
goto out_fput;
inode = file->f_path.dentry->d_inode;
ret = -ESPIPE;
if (S_ISFIFO(inode->i_mode))
goto out_fput;
ret = -ENODEV;
/*
* Let individual file system decide if it supports preallocation
* for directories or not.
*/
if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
goto out_fput;
ret = -EFBIG;
/* Check for wrap through zero too */
if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
goto out_fput;
if (inode->i_op && inode->i_op->fallocate)
ret = inode->i_op->fallocate(inode, mode, offset, len);
else
ret = -ENOSYS;
out_fput:
fput(file);
out:
return ret;
}
/*
* access() needs to use the real uid/gid, not the effective uid/gid.
* We do this by temporarily clearing all FS-related capabilities and
......
......@@ -329,10 +329,11 @@
#define __NR_signalfd 321
#define __NR_timerfd 322
#define __NR_eventfd 323
#define __NR_fallocate 324
#ifdef __KERNEL__
#define NR_syscalls 324
#define NR_syscalls 325
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
......
......@@ -308,6 +308,7 @@ COMPAT_SYS_SPU(move_pages)
SYSCALL_SPU(getcpu)
COMPAT_SYS(epoll_pwait)
COMPAT_SYS_SPU(utimensat)
COMPAT_SYS(fallocate)
COMPAT_SYS_SPU(signalfd)
COMPAT_SYS_SPU(timerfd)
SYSCALL_SPU(eventfd)
......
......@@ -331,10 +331,11 @@
#define __NR_timerfd 306
#define __NR_eventfd 307
#define __NR_sync_file_range2 308
#define __NR_fallocate 309
#ifdef __KERNEL__
#define __NR_syscalls 309
#define __NR_syscalls 310
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
......
......@@ -630,6 +630,8 @@ __SYSCALL(__NR_signalfd, sys_signalfd)
__SYSCALL(__NR_timerfd, sys_timerfd)
#define __NR_eventfd 284
__SYSCALL(__NR_eventfd, sys_eventfd)
#define __NR_fallocate 285
__SYSCALL(__NR_fallocate, sys_fallocate)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
......
......@@ -71,7 +71,7 @@
/*
* Maximal count of links to a file
*/
#define EXT4_LINK_MAX 32000
#define EXT4_LINK_MAX 65000
/*
* Macro-instructions used to manage several block sizes
......@@ -102,6 +102,7 @@
EXT4_GOOD_OLD_FIRST_INO : \
(s)->s_first_ino)
#endif
#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
/*
* Macro-instructions used to manage fragments
......@@ -201,6 +202,7 @@ struct ext4_group_desc
#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
......@@ -225,6 +227,11 @@ struct ext4_new_group_data {
__u32 free_blocks_count;
};
/*
* Following is used by preallocation code to tell get_blocks() that we
* want uninitialzed extents.
*/
#define EXT4_CREATE_UNINITIALIZED_EXT 2
/*
* ioctl commands
......@@ -237,7 +244,7 @@ struct ext4_new_group_data {
#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
......@@ -253,7 +260,7 @@ struct ext4_new_group_data {
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
#endif
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
......@@ -282,7 +289,7 @@ struct ext4_inode {
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
__le32 i_ctime; /* Creation time */
__le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
......@@ -331,10 +338,85 @@ struct ext4_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
__le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
__le32 i_crtime; /* File Creation time */
__le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
};
#define i_size_high i_dir_acl
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
/*
* Extended fields will fit into an inode if the filesystem was formatted
* with large inodes (-I 256 or larger) and there are not currently any EAs
* consuming all of the available space. For new inodes we always reserve
* enough space for the kernel's known extended fields, but for inodes
* created with an old kernel this might not have been the case. None of
* the extended inode fields is critical for correct filesystem operation.
* This macro checks if a certain field fits in the inode. Note that
* inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
*/
#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
((offsetof(typeof(*ext4_inode), field) + \
sizeof((ext4_inode)->field)) \
<= (EXT4_GOOD_OLD_INODE_SIZE + \
(einode)->i_extra_isize)) \
static inline __le32 ext4_encode_extra_time(struct timespec *time)
{
return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
time->tv_sec >> 32 : 0) |
((time->tv_nsec << 2) & EXT4_NSEC_MASK));
}
static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
{
if (sizeof(time->tv_sec) > 4)
time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
<< 32;
time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
}
#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
do { \
(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
(raw_inode)->xtime ## _extra = \
ext4_encode_extra_time(&(inode)->xtime); \
} while (0)
#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
(raw_inode)->xtime ## _extra = \
ext4_encode_extra_time(&(einode)->xtime); \
} while (0)
#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
do { \
(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
} while (0)
#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(einode)->xtime.tv_sec = \
(signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
} while (0)
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
......@@ -533,6 +615,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}
static inline struct timespec ext4_current_time(struct inode *inode)
{
return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
}
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
......@@ -603,6 +692,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
......@@ -620,6 +711,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
/*
......@@ -862,6 +955,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern void ext4_truncate (struct inode *);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
......@@ -983,6 +1077,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
extern void ext4_ext_truncate(struct inode *, struct page *);
extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
loff_t len);
static inline int
ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
......
......@@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
#define EXT_MAX_BLOCK 0xffffffff
#define EXT_MAX_LEN ((1UL << 15) - 1)
/*
* EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
* initialized extent. This is 2^15 and not (2^16 - 1), since we use the
* MSB of ee_len field in the extent datastructure to signify if this
* particular extent is an initialized extent or an uninitialized (i.e.
* preallocated).
* EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
* uninitialized extent.
* If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
* uninitialized one. In other words, if MSB of ee_len is set, it is an
* uninitialized extent with only one special scenario when ee_len = 0x8000.
* In this case we can not have an uninitialized extent of zero length and
* thus we make it as a special case of initialized extent with 0x8000 length.
* This way we get better extent-to-group alignment for initialized extents.
* Hence, the maximum number of blocks we can have in an *initialized*
* extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
*/
#define EXT_INIT_MAX_LEN (1UL << 15)
#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
#define EXT_FIRST_EXTENT(__hdr__) \
......@@ -188,8 +206,31 @@ ext4_ext_invalidate_cache(struct inode *inode)
EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
}
static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
{
/* We can not have an uninitialized extent of zero length! */
BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
}
static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
{
/* Extent with ee_len of 0x8000 is treated as an initialized extent */
return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
}
static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
{
return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
le16_to_cpu(ext->ee_len) :
(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
}
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
......
......@@ -153,6 +153,11 @@ struct ext4_inode_info {
unsigned long i_ext_generation;
struct ext4_ext_cache i_cached_extent;
/*
* File creation time. Its function is same as that of
* struct timespec i_{a,c,m}time in the generic inode.
*/
struct timespec i_crtime;
};
#endif /* _LINUX_EXT4_FS_I */
......@@ -73,7 +73,7 @@ struct ext4_sb_info {
struct list_head s_orphan;
unsigned long s_commit_interval;
struct block_device *journal_bdev;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
......@@ -81,6 +81,7 @@ struct ext4_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
#ifdef EXTENTS_STATS
/* ext4 extents stats */
......
#ifndef _FALLOC_H_
#define _FALLOC_H_
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#endif /* _FALLOC_H_ */
......@@ -1147,6 +1147,8 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*truncate_range)(struct inode *, loff_t, loff_t);
long (*fallocate)(struct inode *inode, int mode, loff_t offset,
loff_t len);
};
struct seq_file;
......
......@@ -50,14 +50,14 @@
*/
#define JBD_DEFAULT_MAX_COMMIT_AGE 5
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
/*
* Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
* consistency checks. By default we don't do this unless
* CONFIG_JBD_DEBUG is on.
* CONFIG_JBD2_DEBUG is on.
*/
#define JBD_EXPENSIVE_CHECKING
extern int jbd2_journal_enable_debug;
extern u8 jbd2_journal_enable_debug;
#define jbd_debug(n, f, a...) \
do { \
......
......@@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
const struct itimerspec __user *utmr);
asmlinkage long sys_eventfd(unsigned int count);
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册