提交 d756d10e 编写于 作者: L Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: extent macros cleanup
  Fix compilation with EXT_DEBUG, also fix leXX_to_cpu conversions.
  ext4: remove extra IS_RDONLY() check
  ext4: Use is_power_of_2()
  Use zero_user_page() in ext4 where possible
  ext4: Remove 65000 subdirectory limit
  ext4: Expand extra_inodes space per the s_{want,min}_extra_isize fields 
  ext4: Add nanosecond timestamps
  jbd2: Move jbd2-debug file to debugfs
  jbd2: Fix CONFIG_JBD_DEBUG ifdef to be CONFIG_JBD2_DEBUG
  ext4: Set the journal JBD2_FEATURE_INCOMPAT_64BIT on large devices
  ext4: Make extents code sanely handle on-disk corruption
  ext4: copy i_flags to inode flags on write
  ext4: Enable extents by default
  Change on-disk format to support 2^15 uninitialized extents
  write support for preallocated blocks
  fallocate support in ext4
  sys_fallocate() implementation on i386, x86_64 and powerpc
......@@ -323,3 +323,4 @@ ENTRY(sys_call_table)
.long sys_signalfd
.long sys_timerfd
.long sys_eventfd
.long sys_fallocate
......@@ -773,6 +773,13 @@ asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
return sys_truncate(path, (high << 32) | low);
}
asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
u32 lenhi, u32 lenlo)
{
return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
((loff_t)lenhi << 32) | lenlo);
}
asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
unsigned long low)
{
......
......@@ -719,4 +719,5 @@ ia32_sys_call_table:
.quad compat_sys_signalfd
.quad compat_sys_timerfd
.quad sys_eventfd
.quad sys32_fallocate
ia32_syscall_end:
......@@ -879,3 +879,11 @@ asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
len, advice);
}
asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
unsigned offset_hi, unsigned len_lo,
unsigned len_hi)
{
return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
((u64)len_hi << 32) | len_lo);
}
......@@ -251,7 +251,7 @@ config JBD2
config JBD2_DEBUG
bool "JBD2 (ext4dev/ext4) debugging support"
depends on JBD2
depends on JBD2 && DEBUG_FS
help
If you are using the ext4dev/ext4 journaled file system (or
potentially any other filesystem/device using JBD2), this option
......@@ -260,10 +260,10 @@ config JBD2_DEBUG
By default, the debugging output will be turned off.
If you select Y here, then you will be able to turn on debugging
with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between
1 and 5. The higher the number, the more debugging output is
generated. To turn debugging off again, do
"echo 0 > /proc/sys/fs/jbd2-debug".
with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a
number between 1 and 5. The higher the number, the more debugging
output is generated. To turn debugging off again, do
"echo 0 > /sys/kernel/debug/jbd2/jbd2-debug".
config FS_MBCACHE
# Meta block cache for Extended Attributes (ext2/ext3/ext4)
......
......@@ -517,7 +517,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
/*
* An HJ special. This is expensive...
*/
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
jbd_unlock_bh_state(bitmap_bh);
{
struct buffer_head *debug_bh;
......@@ -1597,7 +1597,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
performed_allocation = 1;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
{
struct buffer_head *debug_bh;
......
......@@ -39,6 +39,7 @@
#include <linux/quotaops.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/falloc.h>
#include <linux/ext4_fs_extents.h>
#include <asm/uaccess.h>
......@@ -91,36 +92,6 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}
static int ext4_ext_check_header(const char *function, struct inode *inode,
struct ext4_extent_header *eh)
{
const char *error_msg = NULL;
if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
error_msg = "invalid magic";
goto corrupted;
}
if (unlikely(eh->eh_max == 0)) {
error_msg = "invalid eh_max";
goto corrupted;
}
if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
error_msg = "invalid eh_entries";
goto corrupted;
}
return 0;
corrupted:
ext4_error(inode->i_sb, function,
"bad header in inode #%lu: %s - magic %x, "
"entries %u, max %u, depth %u",
inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
le16_to_cpu(eh->eh_depth));
return -EIO;
}
static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
{
int err;
......@@ -269,6 +240,70 @@ static int ext4_ext_space_root_idx(struct inode *inode)
return size;
}
static int
ext4_ext_max_entries(struct inode *inode, int depth)
{
int max;
if (depth == ext_depth(inode)) {
if (depth == 0)
max = ext4_ext_space_root(inode);
else
max = ext4_ext_space_root_idx(inode);
} else {
if (depth == 0)
max = ext4_ext_space_block(inode);
else
max = ext4_ext_space_block_idx(inode);
}
return max;
}
static int __ext4_ext_check_header(const char *function, struct inode *inode,
struct ext4_extent_header *eh,
int depth)
{
const char *error_msg;
int max = 0;
if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
error_msg = "invalid magic";
goto corrupted;
}
if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
error_msg = "unexpected eh_depth";
goto corrupted;
}
if (unlikely(eh->eh_max == 0)) {
error_msg = "invalid eh_max";
goto corrupted;
}
max = ext4_ext_max_entries(inode, depth);
if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
error_msg = "too large eh_max";
goto corrupted;
}
if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
error_msg = "invalid eh_entries";
goto corrupted;
}
return 0;
corrupted:
ext4_error(inode->i_sb, function,
"bad header in inode #%lu: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)",
inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
max, le16_to_cpu(eh->eh_depth), depth);
return -EIO;
}
#define ext4_ext_check_header(inode, eh, depth) \
__ext4_ext_check_header(__FUNCTION__, inode, eh, depth)
#ifdef EXT_DEBUG
static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
{
......@@ -282,7 +317,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
} else if (path->p_ext) {
ext_debug(" %d:%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
le16_to_cpu(path->p_ext->ee_len),
ext4_ext_get_actual_len(path->p_ext),
ext_pblock(path->p_ext));
} else
ext_debug(" []");
......@@ -305,7 +340,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block),
le16_to_cpu(ex->ee_len), ext_pblock(ex));
ext4_ext_get_actual_len(ex), ext_pblock(ex));
}
ext_debug("\n");
}
......@@ -329,6 +364,7 @@ static void ext4_ext_drop_refs(struct ext4_ext_path *path)
/*
* ext4_ext_binsearch_idx:
* binary search for the closest index of the given block
* the header must be checked before calling this
*/
static void
ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
......@@ -336,27 +372,25 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
struct ext4_extent_header *eh = path->p_hdr;
struct ext4_extent_idx *r, *l, *m;
BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
BUG_ON(le16_to_cpu(eh->eh_entries) <= 0);
ext_debug("binsearch for %d(idx): ", block);
l = EXT_FIRST_INDEX(eh) + 1;
r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1;
r = EXT_LAST_INDEX(eh);
while (l <= r) {
m = l + (r - l) / 2;
if (block < le32_to_cpu(m->ei_block))
r = m - 1;
else
l = m + 1;
ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block,
m, m->ei_block, r, r->ei_block);
ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
m, le32_to_cpu(m->ei_block),
r, le32_to_cpu(r->ei_block));
}
path->p_idx = l - 1;
ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
idx_block(path->p_idx));
idx_pblock(path->p_idx));
#ifdef CHECK_BINSEARCH
{
......@@ -388,6 +422,7 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
/*
* ext4_ext_binsearch:
* binary search for closest extent of the given block
* the header must be checked before calling this
*/
static void
ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
......@@ -395,9 +430,6 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
struct ext4_extent_header *eh = path->p_hdr;
struct ext4_extent *r, *l, *m;
BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
if (eh->eh_entries == 0) {
/*
* this leaf is empty:
......@@ -409,7 +441,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
ext_debug("binsearch for %d: ", block);
l = EXT_FIRST_EXTENT(eh) + 1;
r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1;
r = EXT_LAST_EXTENT(eh);
while (l <= r) {
m = l + (r - l) / 2;
......@@ -417,15 +449,16 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
r = m - 1;
else
l = m + 1;
ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block,
m, m->ee_block, r, r->ee_block);
ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
m, le32_to_cpu(m->ee_block),
r, le32_to_cpu(r->ee_block));
}
path->p_ext = l - 1;
ext_debug(" -> %d:%llu:%d ",
le32_to_cpu(path->p_ext->ee_block),
ext_pblock(path->p_ext),
le16_to_cpu(path->p_ext->ee_len));
ext4_ext_get_actual_len(path->p_ext));
#ifdef CHECK_BINSEARCH
{
......@@ -468,11 +501,10 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
short int depth, i, ppos = 0, alloc = 0;
eh = ext_inode_hdr(inode);
BUG_ON(eh == NULL);
if (ext4_ext_check_header(__FUNCTION__, inode, eh))
depth = ext_depth(inode);
if (ext4_ext_check_header(inode, eh, depth))
return ERR_PTR(-EIO);
i = depth = ext_depth(inode);
/* account possible depth increase */
if (!path) {
......@@ -484,10 +516,12 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
}
path[0].p_hdr = eh;
i = depth;
/* walk through the tree */
while (i) {
ext_debug("depth %d: num %d, max %d\n",
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
ext4_ext_binsearch_idx(inode, path + ppos, block);
path[ppos].p_block = idx_pblock(path[ppos].p_idx);
path[ppos].p_depth = i;
......@@ -504,7 +538,7 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
path[ppos].p_hdr = eh;
i--;
if (ext4_ext_check_header(__FUNCTION__, inode, eh))
if (ext4_ext_check_header(inode, eh, i))
goto err;
}
......@@ -513,9 +547,6 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
path[ppos].p_ext = NULL;
path[ppos].p_idx = NULL;
if (ext4_ext_check_header(__FUNCTION__, inode, eh))
goto err;
/* find extent */
ext4_ext_binsearch(inode, path + ppos, block);
......@@ -553,7 +584,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
len = (len - 1) * sizeof(struct ext4_extent_idx);
len = len < 0 ? 0 : len;
ext_debug("insert new index %d after: %d. "
ext_debug("insert new index %d after: %llu. "
"move %d from 0x%p to 0x%p\n",
logical, ptr, len,
(curp->p_idx + 1), (curp->p_idx + 2));
......@@ -564,7 +595,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
/* insert before */
len = len * sizeof(struct ext4_extent_idx);
len = len < 0 ? 0 : len;
ext_debug("insert new index %d before: %d. "
ext_debug("insert new index %d before: %llu. "
"move %d from 0x%p to 0x%p\n",
logical, ptr, len,
curp->p_idx, (curp->p_idx + 1));
......@@ -686,7 +717,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
ext_debug("move %d:%llu:%d in new leaf %llu\n",
le32_to_cpu(path[depth].p_ext->ee_block),
ext_pblock(path[depth].p_ext),
le16_to_cpu(path[depth].p_ext->ee_len),
ext4_ext_get_actual_len(path[depth].p_ext),
newblock);
/*memmove(ex++, path[depth].p_ext++,
sizeof(struct ext4_extent));
......@@ -764,7 +795,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
EXT_LAST_INDEX(path[i].p_hdr));
while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
ext_debug("%d: move %d:%d in new index %llu\n", i,
ext_debug("%d: move %d:%llu in new index %llu\n", i,
le32_to_cpu(path[i].p_idx->ei_block),
idx_pblock(path[i].p_idx),
newblock);
......@@ -893,8 +924,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode));
curp->p_hdr->eh_entries = cpu_to_le16(1);
curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
/* FIXME: it works, but actually path[0] can be index */
curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
if (path[0].p_hdr->eh_depth)
curp->p_idx->ei_block =
EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
else
curp->p_idx->ei_block =
EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
ext4_idx_store_pblock(curp->p_idx, newblock);
neh = ext_inode_hdr(inode);
......@@ -1106,7 +1142,24 @@ static int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) !=
unsigned short ext1_ee_len, ext2_ee_len, max_len;
/*
* Make sure that either both extents are uninitialized, or
* both are _not_.
*/
if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
return 0;
if (ext4_ext_is_uninitialized(ex1))
max_len = EXT_UNINIT_MAX_LEN;
else
max_len = EXT_INIT_MAX_LEN;
ext1_ee_len = ext4_ext_get_actual_len(ex1);
ext2_ee_len = ext4_ext_get_actual_len(ex2);
if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
le32_to_cpu(ex2->ee_block))
return 0;
......@@ -1115,18 +1168,65 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
* as an RO_COMPAT feature, refuse to merge to extents if
* this can result in the top bit of ee_len being set.
*/
if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN)
if (ext1_ee_len + ext2_ee_len > max_len)
return 0;
#ifdef AGGRESSIVE_TEST
if (le16_to_cpu(ex1->ee_len) >= 4)
return 0;
#endif
if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2))
if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
return 1;
return 0;
}
/*
* This function tries to merge the "ex" extent to the next extent in the tree.
* It always tries to merge towards right. If you want to merge towards
* left, pass "ex - 1" as argument instead of "ex".
* Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
* 1 if they got merged.
*/
int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex)
{
struct ext4_extent_header *eh;
unsigned int depth, len;
int merge_done = 0;
int uninitialized = 0;
depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
eh = path[depth].p_hdr;
while (ex < EXT_LAST_EXTENT(eh)) {
if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
break;
/* merge with next extent! */
if (ext4_ext_is_uninitialized(ex))
uninitialized = 1;
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(ex + 1));
if (uninitialized)
ext4_ext_mark_uninitialized(ex);
if (ex + 1 < EXT_LAST_EXTENT(eh)) {
len = (EXT_LAST_EXTENT(eh) - ex - 1)
* sizeof(struct ext4_extent);
memmove(ex + 1, ex + 2, len);
}
eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
merge_done = 1;
WARN_ON(eh->eh_entries == 0);
if (!eh->eh_entries)
ext4_error(inode->i_sb, "ext4_ext_try_to_merge",
"inode#%lu, eh->eh_entries = 0!", inode->i_ino);
}
return merge_done;
}
/*
* check if a portion of the "newext" extent overlaps with an
* existing extent.
......@@ -1144,7 +1244,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
unsigned int ret = 0;
b1 = le32_to_cpu(newext->ee_block);
len1 = le16_to_cpu(newext->ee_len);
len1 = ext4_ext_get_actual_len(newext);
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
......@@ -1191,8 +1291,9 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_extent *nearex; /* nearest extent */
struct ext4_ext_path *npath = NULL;
int depth, len, err, next;
unsigned uninitialized = 0;
BUG_ON(newext->ee_len == 0);
BUG_ON(ext4_ext_get_actual_len(newext) == 0);
depth = ext_depth(inode);
ex = path[depth].p_ext;
BUG_ON(path[depth].p_hdr == NULL);
......@@ -1200,14 +1301,24 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
/* try to insert block into found extent and return */
if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append %d block to %d:%d (from %llu)\n",
le16_to_cpu(newext->ee_len),
ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block),
le16_to_cpu(ex->ee_len), ext_pblock(ex));
ext4_ext_get_actual_len(ex), ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
return err;
ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len)
+ le16_to_cpu(newext->ee_len));
/*
* ext4_can_extents_be_merged should have checked that either
* both extents are uninitialized, or both aren't. Thus we
* need to check only one of them here.
*/
if (ext4_ext_is_uninitialized(ex))
uninitialized = 1;
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext));
if (uninitialized)
ext4_ext_mark_uninitialized(ex);
eh = path[depth].p_hdr;
nearex = ex;
goto merge;
......@@ -1263,7 +1374,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
ext_debug("first extent in the leaf: %d:%llu:%d\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
le16_to_cpu(newext->ee_len));
ext4_ext_get_actual_len(newext));
path[depth].p_ext = EXT_FIRST_EXTENT(eh);
} else if (le32_to_cpu(newext->ee_block)
> le32_to_cpu(nearex->ee_block)) {
......@@ -1276,7 +1387,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
le16_to_cpu(newext->ee_len),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
memmove(nearex + 2, nearex + 1, len);
}
......@@ -1289,7 +1400,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
le16_to_cpu(newext->ee_len),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
memmove(nearex + 1, nearex, len);
path[depth].p_ext = nearex;
......@@ -1304,20 +1415,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
merge:
/* try to merge extents to the right */
while (nearex < EXT_LAST_EXTENT(eh)) {
if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
break;
/* merge with next extent! */
nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len)
+ le16_to_cpu(nearex[1].ee_len));
if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
len = (EXT_LAST_EXTENT(eh) - nearex - 1)
* sizeof(struct ext4_extent);
memmove(nearex + 1, nearex + 2, len);
}
eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
BUG_ON(eh->eh_entries == 0);
}
ext4_ext_try_to_merge(inode, path, nearex);
/* try to merge extents to the left */
......@@ -1379,8 +1477,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
end = le32_to_cpu(ex->ee_block);
if (block + num < end)
end = block + num;
} else if (block >=
le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) {
} else if (block >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) {
/* need to allocate space after found extent */
start = block;
end = block + num;
......@@ -1392,7 +1490,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
* by found extent
*/
start = block;
end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len);
end = le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex);
if (block + num < end)
end = block + num;
exists = 1;
......@@ -1408,7 +1507,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
cbex.ec_type = EXT4_EXT_CACHE_GAP;
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
cbex.ec_len = le16_to_cpu(ex->ee_len);
cbex.ec_len = ext4_ext_get_actual_len(ex);
cbex.ec_start = ext_pblock(ex);
cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
......@@ -1481,15 +1580,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
ext_debug("cache gap(before): %lu [%lu:%lu]",
(unsigned long) block,
(unsigned long) le32_to_cpu(ex->ee_block),
(unsigned long) le16_to_cpu(ex->ee_len));
(unsigned long) ext4_ext_get_actual_len(ex));
} else if (block >= le32_to_cpu(ex->ee_block)
+ le16_to_cpu(ex->ee_len)) {
+ ext4_ext_get_actual_len(ex)) {
lblock = le32_to_cpu(ex->ee_block)
+ le16_to_cpu(ex->ee_len);
+ ext4_ext_get_actual_len(ex);
len = ext4_ext_next_allocated_block(path);
ext_debug("cache gap(after): [%lu:%lu] %lu",
(unsigned long) le32_to_cpu(ex->ee_block),
(unsigned long) le16_to_cpu(ex->ee_len),
(unsigned long) ext4_ext_get_actual_len(ex),
(unsigned long) block);
BUG_ON(len == lblock);
len = len - lblock;
......@@ -1619,12 +1718,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
unsigned long from, unsigned long to)
{
struct buffer_head *bh;
unsigned short ee_len = ext4_ext_get_actual_len(ex);
int i;
#ifdef EXTENTS_STATS
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned short ee_len = le16_to_cpu(ex->ee_len);
spin_lock(&sbi->s_ext_stats_lock);
sbi->s_ext_blocks += ee_len;
sbi->s_ext_extents++;
......@@ -1638,12 +1737,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
}
#endif
if (from >= le32_to_cpu(ex->ee_block)
&& to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
&& to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
/* tail removal */
unsigned long num;
ext4_fsblk_t start;
num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from;
start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num;
num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext_pblock(ex) + ee_len - num;
ext_debug("free last %lu blocks starting %llu\n", num, start);
for (i = 0; i < num; i++) {
bh = sb_find_get_block(inode->i_sb, start + i);
......@@ -1651,12 +1750,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
}
ext4_free_blocks(handle, inode, start, num);
} else if (from == le32_to_cpu(ex->ee_block)
&& to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n",
from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len));
from, to, le32_to_cpu(ex->ee_block), ee_len);
} else {
printk("strange request: removal(2) %lu-%lu from %u:%u\n",
from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len));
from, to, le32_to_cpu(ex->ee_block), ee_len);
}
return 0;
}
......@@ -1671,21 +1770,23 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
unsigned a, b, block, num;
unsigned long ex_ee_block;
unsigned short ex_ee_len;
unsigned uninitialized = 0;
struct ext4_extent *ex;
/* the header must be checked already in ext4_ext_remove_space() */
ext_debug("truncate since %lu in leaf\n", start);
if (!path[depth].p_hdr)
path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
eh = path[depth].p_hdr;
BUG_ON(eh == NULL);
BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
/* find where to start removing */
ex = EXT_LAST_EXTENT(eh);
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = le16_to_cpu(ex->ee_len);
if (ext4_ext_is_uninitialized(ex))
uninitialized = 1;
ex_ee_len = ext4_ext_get_actual_len(ex);
while (ex >= EXT_FIRST_EXTENT(eh) &&
ex_ee_block + ex_ee_len > start) {
......@@ -1753,6 +1854,12 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex->ee_block = cpu_to_le32(block);
ex->ee_len = cpu_to_le16(num);
/*
* Do not mark uninitialized if all the blocks in the
* extent have been removed.
*/
if (uninitialized && num)
ext4_ext_mark_uninitialized(ex);
err = ext4_ext_dirty(handle, inode, path + depth);
if (err)
......@@ -1762,7 +1869,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext_pblock(ex));
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = le16_to_cpu(ex->ee_len);
ex_ee_len = ext4_ext_get_actual_len(ex);
}
if (correct_index && eh->eh_entries)
......@@ -1825,7 +1932,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
return -ENOMEM;
}
path[0].p_hdr = ext_inode_hdr(inode);
if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) {
if (ext4_ext_check_header(inode, path[0].p_hdr, depth)) {
err = -EIO;
goto out;
}
......@@ -1846,17 +1953,8 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
if (!path[i].p_hdr) {
ext_debug("initialize header\n");
path[i].p_hdr = ext_block_hdr(path[i].p_bh);
if (ext4_ext_check_header(__FUNCTION__, inode,
path[i].p_hdr)) {
err = -EIO;
goto out;
}
}
BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries)
> le16_to_cpu(path[i].p_hdr->eh_max));
BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC);
if (!path[i].p_idx) {
/* this level hasn't been touched yet */
path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
......@@ -1873,17 +1971,27 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
i, EXT_FIRST_INDEX(path[i].p_hdr),
path[i].p_idx);
if (ext4_ext_more_to_rm(path + i)) {
struct buffer_head *bh;
/* go to the next level */
ext_debug("move to level %d (block %llu)\n",
i + 1, idx_pblock(path[i].p_idx));
memset(path + i + 1, 0, sizeof(*path));
path[i+1].p_bh =
sb_bread(sb, idx_pblock(path[i].p_idx));
if (!path[i+1].p_bh) {
bh = sb_bread(sb, idx_pblock(path[i].p_idx));
if (!bh) {
/* should we reset i_size? */
err = -EIO;
break;
}
if (WARN_ON(i + 1 > depth)) {
err = -EIO;
break;
}
if (ext4_ext_check_header(inode, ext_block_hdr(bh),
depth - i - 1)) {
err = -EIO;
break;
}
path[i + 1].p_bh = bh;
/* save actual number of indexes since this
* number is changed at the next iteration */
......@@ -1977,15 +2085,158 @@ void ext4_ext_release(struct super_block *sb)
#endif
}
/*
* This function is called by ext4_ext_get_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
* extent into multiple extents (upto three - one initialized and two
* uninitialized).
* There are three possibilities:
* a> There is no split required: Entire extent should be initialized
* b> Splits in two extents: Write is happening at either end of the extent
* c> Splits in three extents: Somone is writing in middle of the extent
*/
int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
ext4_fsblk_t iblock,
unsigned long max_blocks)
{
struct ext4_extent *ex, newex;
struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
unsigned int allocated, ee_block, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
int ret = 0;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
ex2 = ex;
/* ex1: ee_block to iblock - 1 : uninitialized */
if (iblock > ee_block) {
ex1 = ex;
ex1->ee_len = cpu_to_le16(iblock - ee_block);
ext4_ext_mark_uninitialized(ex1);
ex2 = &newex;
}
/*
* for sanity, update the length of the ex2 extent before
* we insert ex3, if ex1 is NULL. This is to avoid temporary
* overlap of blocks.
*/
if (!ex1 && allocated > max_blocks)
ex2->ee_len = cpu_to_le16(max_blocks);
/* ex3: to ee_block + ee_len : uninitialised */
if (allocated > max_blocks) {
unsigned int newdepth;
ex3 = &newex;
ex3->ee_block = cpu_to_le32(iblock + max_blocks);
ext4_ext_store_pblock(ex3, newblock + max_blocks);
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3);
if (err)
goto out;
/*
* The depth, and hence eh & ex might change
* as part of the insert above.
*/
newdepth = ext_depth(inode);
if (newdepth != depth) {
depth = newdepth;
path = ext4_ext_find_extent(inode, iblock, NULL);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
goto out;
}
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex2 != &newex)
ex2 = ex;
}
allocated = max_blocks;
}
/*
* If there was a change of depth as part of the
* insertion of ex3 above, we need to update the length
* of the ex1 extent again here
*/
if (ex1 && ex1 != ex) {
ex1 = ex;
ex1->ee_len = cpu_to_le16(iblock - ee_block);
ext4_ext_mark_uninitialized(ex1);
ex2 = &newex;
}
/* ex2: iblock to iblock + maxblocks-1 : initialised */
ex2->ee_block = cpu_to_le32(iblock);
ex2->ee_start = cpu_to_le32(newblock);
ext4_ext_store_pblock(ex2, newblock);
ex2->ee_len = cpu_to_le16(allocated);
if (ex2 != ex)
goto insert;
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/*
* New (initialized) extent starts from the first block
* in the current extent. i.e., ex2 == ex
* We have to see if it can be merged with the extent
* on the left.
*/
if (ex2 > EXT_FIRST_EXTENT(eh)) {
/*
* To merge left, pass "ex2 - 1" to try_to_merge(),
* since it merges towards right _only_.
*/
ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
if (ret) {
err = ext4_ext_correct_indexes(handle, inode, path);
if (err)
goto out;
depth = ext_depth(inode);
ex2--;
}
}
/*
* Try to Merge towards right. This might be required
* only when the whole extent is being written to.
* i.e. ex2 == ex and ex3 == NULL.
*/
if (!ex3) {
ret = ext4_ext_try_to_merge(inode, path, ex2);
if (ret) {
err = ext4_ext_correct_indexes(handle, inode, path);
if (err)
goto out;
}
}
/* Mark modified extent as dirty */
err = ext4_ext_dirty(handle, inode, path + depth);
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex);
out:
return err ? err : allocated;
}
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t iblock,
unsigned long max_blocks, struct buffer_head *bh_result,
int create, int extend_disksize)
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
struct ext4_extent newex, *ex;
ext4_fsblk_t goal, newblock;
int err = 0, depth;
int err = 0, depth, ret;
unsigned long allocated = 0;
__clear_bit(BH_New, &bh_result->b_state);
......@@ -1998,8 +2249,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
if (goal) {
if (goal == EXT4_EXT_CACHE_GAP) {
if (!create) {
/* block isn't allocated yet and
* user doesn't want to allocate it */
/*
* block isn't allocated yet and
* user doesn't want to allocate it
*/
goto out2;
}
/* we should allocate requested block */
......@@ -2033,21 +2286,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* this is why assert can't be put in ext4_ext_find_extent()
*/
BUG_ON(path[depth].p_ext == NULL && depth != 0);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex) {
unsigned long ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext_pblock(ex);
unsigned short ee_len = le16_to_cpu(ex->ee_len);
unsigned short ee_len;
/*
* Allow future support for preallocated extents to be added
* as an RO_COMPAT feature:
* Uninitialized extents are treated as holes, except that
* we avoid (fail) allocating new blocks during a write.
* we split out initialized portions during a write.
*/
if (ee_len > EXT_MAX_LEN)
goto out2;
ee_len = ext4_ext_get_actual_len(ex);
/* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
newblock = iblock - ee_block + ee_start;
......@@ -2055,9 +2306,27 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
allocated = ee_len - (iblock - ee_block);
ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
ee_block, ee_len, newblock);
ext4_ext_put_in_cache(inode, ee_block, ee_len,
ee_start, EXT4_EXT_CACHE_EXTENT);
goto out;
/* Do not put uninitialized extent in the cache */
if (!ext4_ext_is_uninitialized(ex)) {
ext4_ext_put_in_cache(inode, ee_block,
ee_len, ee_start,
EXT4_EXT_CACHE_EXTENT);
goto out;
}
if (create == EXT4_CREATE_UNINITIALIZED_EXT)
goto out;
if (!create)
goto out2;
ret = ext4_ext_convert_to_initialized(handle, inode,
path, iblock,
max_blocks);
if (ret <= 0)
goto out2;
else
allocated = ret;
goto outnew;
}
}
......@@ -2066,8 +2335,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero
*/
if (!create) {
/* put just found gap into cache to speed up
* subsequent requests */
/*
* put just found gap into cache to speed up
* subsequent requests
*/
ext4_ext_put_gap_in_cache(inode, path, iblock);
goto out2;
}
......@@ -2081,6 +2352,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* allocate new block */
goal = ext4_ext_find_goal(inode, path, iblock);
/*
* See if request is beyond maximum number of blocks we can have in
* a single extent. For an initialized extent this limit is
* EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
* EXT_UNINIT_MAX_LEN.
*/
if (max_blocks > EXT_INIT_MAX_LEN &&
create != EXT4_CREATE_UNINITIALIZED_EXT)
max_blocks = EXT_INIT_MAX_LEN;
else if (max_blocks > EXT_UNINIT_MAX_LEN &&
create == EXT4_CREATE_UNINITIALIZED_EXT)
max_blocks = EXT_UNINIT_MAX_LEN;
/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
newex.ee_block = cpu_to_le32(iblock);
newex.ee_len = cpu_to_le16(max_blocks);
......@@ -2098,6 +2382,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(allocated);
if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
ext4_ext_mark_uninitialized(&newex);
err = ext4_ext_insert_extent(handle, inode, path, &newex);
if (err) {
/* free data blocks we just allocated */
......@@ -2111,10 +2397,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* previous routine could use block we allocated */
newblock = ext_pblock(&newex);
outnew:
__set_bit(BH_New, &bh_result->b_state);
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
EXT4_EXT_CACHE_EXTENT);
/* Cache only when it is _not_ an uninitialized extent */
if (create != EXT4_CREATE_UNINITIALIZED_EXT)
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
EXT4_EXT_CACHE_EXTENT);
out:
if (allocated > max_blocks)
allocated = max_blocks;
......@@ -2178,7 +2467,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
err = ext4_ext_remove_space(inode, last_block);
/* In a multi-transaction truncate, we only make the final
* transaction synchronous. */
* transaction synchronous.
*/
if (IS_SYNC(inode))
handle->h_sync = 1;
......@@ -2217,3 +2507,127 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
return needed;
}
/*
* preallocate space for a file. This implements ext4's fallocate inode
* operation, which gets called from sys_fallocate system call.
* For block-mapped files, posix_fallocate should fall back to the method
* of writing zeroes to the required new blocks (the same behavior which is
* expected for file systems which do not support fallocate() system call).
*/
long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
{
handle_t *handle;
ext4_fsblk_t block, max_blocks;
ext4_fsblk_t nblocks = 0;
int ret = 0;
int ret2 = 0;
int retries = 0;
struct buffer_head map_bh;
unsigned int credits, blkbits = inode->i_blkbits;
/*
* currently supporting (pre)allocate mode for extent-based
* files _only_
*/
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
return -EOPNOTSUPP;
/* preallocation to directories is currently not supported */
if (S_ISDIR(inode->i_mode))
return -ENODEV;
block = offset >> blkbits;
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- block;
/*
* credits to insert 1 extent into extent tree + buffers to be able to
* modify 1 super block, 1 block bitmap and 1 group descriptor.
*/
credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
retry:
while (ret >= 0 && ret < max_blocks) {
block = block + ret;
max_blocks = max_blocks - ret;
handle = ext4_journal_start(inode, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
break;
}
ret = ext4_ext_get_blocks(handle, inode, block,
max_blocks, &map_bh,
EXT4_CREATE_UNINITIALIZED_EXT, 0);
WARN_ON(!ret);
if (!ret) {
ext4_error(inode->i_sb, "ext4_fallocate",
"ext4_ext_get_blocks returned 0! inode#%lu"
", block=%llu, max_blocks=%llu",
inode->i_ino, block, max_blocks);
ret = -EIO;
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
break;
}
if (ret > 0) {
/* check wrap through sign-bit/zero here */
if ((block + ret) < 0 || (block + ret) < block) {
ret = -EIO;
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
break;
}
if (buffer_new(&map_bh) && ((block + ret) >
(EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
>> blkbits)))
nblocks = nblocks + ret;
}
/* Update ctime if new blocks get allocated */
if (nblocks) {
struct timespec now;
now = current_fs_time(inode->i_sb);
if (!timespec_equal(&inode->i_ctime, &now))
inode->i_ctime = now;
}
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
/*
* Time to update the file size.
* Update only when preallocation was requested beyond the file size.
*/
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(offset + len) > i_size_read(inode)) {
if (ret > 0) {
/*
* if no error, we assume preallocation succeeded
* completely
*/
mutex_lock(&inode->i_mutex);
i_size_write(inode, offset + len);
EXT4_I(inode)->i_disksize = i_size_read(inode);
mutex_unlock(&inode->i_mutex);
} else if (ret < 0 && nblocks) {
/* Handle partial allocation scenario */
loff_t newsize;
mutex_lock(&inode->i_mutex);
newsize = (nblocks << blkbits) + i_size_read(inode);
i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
EXT4_I(inode)->i_disksize = i_size_read(inode);
mutex_unlock(&inode->i_mutex);
}
}
return ret > 0 ? ret2 : ret;
}
......@@ -134,5 +134,6 @@ const struct inode_operations ext4_file_inode_operations = {
.removexattr = generic_removexattr,
#endif
.permission = ext4_permission,
.fallocate = ext4_fallocate,
};
......@@ -563,7 +563,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
inode->i_ino = ino;
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
ext4_current_time(inode);
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
......@@ -595,9 +596,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
spin_unlock(&sbi->s_next_gen_lock);
ei->i_state = EXT4_STATE_NEW;
ei->i_extra_isize =
(EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
......
......@@ -726,7 +726,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
/* We are done with atomic stuff, now do the rest of housekeeping */
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
/* had we spliced it onto indirect block? */
......@@ -1766,7 +1766,6 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
struct inode *inode = mapping->host;
struct buffer_head *bh;
int err = 0;
void *kaddr;
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
......@@ -1778,10 +1777,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
*/
if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
ext4_should_writeback_data(inode) && PageUptodate(page)) {
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + offset, 0, length);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
zero_user_page(page, offset, length, KM_USER0);
set_page_dirty(page);
goto unlock;
}
......@@ -1834,10 +1830,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + offset, 0, length);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
zero_user_page(page, offset, length, KM_USER0);
BUFFER_TRACE(bh, "zeroed end of block");
......@@ -2375,7 +2368,7 @@ void ext4_truncate(struct inode *inode)
ext4_discard_reservation(inode);
mutex_unlock(&ei->truncate_mutex);
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
/*
......@@ -2583,6 +2576,25 @@ void ext4_set_inode_flags(struct inode *inode)
inode->i_flags |= S_DIRSYNC;
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
void ext4_get_inode_flags(struct ext4_inode_info *ei)
{
unsigned int flags = ei->vfs_inode.i_flags;
ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
if (flags & S_SYNC)
ei->i_flags |= EXT4_SYNC_FL;
if (flags & S_APPEND)
ei->i_flags |= EXT4_APPEND_FL;
if (flags & S_IMMUTABLE)
ei->i_flags |= EXT4_IMMUTABLE_FL;
if (flags & S_NOATIME)
ei->i_flags |= EXT4_NOATIME_FL;
if (flags & S_DIRSYNC)
ei->i_flags |= EXT4_DIRSYNC_FL;
}
void ext4_read_inode(struct inode * inode)
{
struct ext4_iloc iloc;
......@@ -2610,10 +2622,6 @@ void ext4_read_inode(struct inode * inode)
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
ei->i_state = 0;
ei->i_dir_start_lookup = 0;
......@@ -2691,6 +2699,11 @@ void ext4_read_inode(struct inode * inode)
} else
ei->i_extra_isize = 0;
EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
......@@ -2744,6 +2757,7 @@ static int ext4_do_update_inode(handle_t *handle,
if (ei->i_state & EXT4_STATE_NEW)
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
ext4_get_inode_flags(ei);
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
if(!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
......@@ -2771,9 +2785,12 @@ static int ext4_do_update_inode(handle_t *handle,
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
......@@ -3081,6 +3098,39 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
return err;
}
/*
* Expand an inode by new_extra_isize bytes.
* Returns 0 on success or negative error number on failure.
*/
int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize,
struct ext4_iloc iloc, handle_t *handle)
{
struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
return 0;
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
/* No extended attributes present */
if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) ||
header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
new_extra_isize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
return 0;
}
/* try to expand with EAs present */
return ext4_expand_extra_isize_ea(inode, new_extra_isize,
raw_inode, handle);
}
/*
* What we do here is to mark the in-core inode as clean with respect to inode
* dirtiness (it may still be data-dirty).
......@@ -3105,10 +3155,38 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
{
struct ext4_iloc iloc;
int err;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
static unsigned int mnt_count;
int err, ret;
might_sleep();
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
/*
* We need extra buffer credits since we may write into EA block
* with this same handle. If journal_extend fails, then it will
* only result in a minor loss of functionality for that inode.
* If this is felt to be critical, then e2fsck should be run to
* force a large enough s_min_extra_isize.
*/
if ((jbd2_journal_extend(handle,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
ret = ext4_expand_extra_isize(inode,
sbi->s_want_extra_isize,
iloc, handle);
if (ret) {
EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
if (mnt_count != sbi->s_es->s_mnt_count) {
ext4_warning(inode->i_sb, __FUNCTION__,
"Unable to expand inode %lu. Delete"
" some EAs or run e2fsck.",
inode->i_ino);
mnt_count = sbi->s_es->s_mnt_count;
}
}
}
}
if (!err)
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
return err;
......@@ -3197,7 +3275,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
*/
journal = EXT4_JOURNAL(inode);
if (is_journal_aborted(journal) || IS_RDONLY(inode))
if (is_journal_aborted(journal))
return -EROFS;
jbd2_journal_lock_updates(journal);
......
......@@ -28,6 +28,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
switch (cmd) {
case EXT4_IOC_GETFLAGS:
ext4_get_inode_flags(ei);
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
......@@ -96,7 +97,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
ei->i_flags = flags;
ext4_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
flags_err:
......@@ -133,14 +134,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
return PTR_ERR(handle);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
inode->i_generation = generation;
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
}
ext4_journal_stop(handle);
return err;
}
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
case EXT4_IOC_WAIT_FOR_READONLY:
/*
* This is racy - by the time we're woken up and running,
......@@ -282,7 +283,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC32_SETVERSION_OLD:
cmd = EXT4_IOC_SETVERSION_OLD;
break;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
case EXT4_IOC32_WAIT_FOR_READONLY:
cmd = EXT4_IOC_WAIT_FOR_READONLY;
break;
......
......@@ -1295,7 +1295,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
......@@ -1629,6 +1629,35 @@ static int ext4_delete_entry (handle_t *handle,
return -ENOENT;
}
/*
* DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
* since this indicates that nlinks count was previously 1.
*/
static void ext4_inc_count(handle_t *handle, struct inode *inode)
{
inc_nlink(inode);
if (is_dx(inode) && inode->i_nlink > 1) {
/* limit is 16-bit i_links_count */
if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
inode->i_nlink = 1;
EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
}
}
}
/*
* If a directory had nlink == 1, then we should let it be 1. This indicates
* directory has >EXT4_LINK_MAX subdirs.
*/
static void ext4_dec_count(handle_t *handle, struct inode *inode)
{
drop_nlink(inode);
if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
inc_nlink(inode);
}
static int ext4_add_nondir(handle_t *handle,
struct dentry *dentry, struct inode *inode)
{
......@@ -1725,7 +1754,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
struct ext4_dir_entry_2 * de;
int err, retries = 0;
if (dir->i_nlink >= EXT4_LINK_MAX)
if (EXT4_DIR_LINK_MAX(dir))
return -EMLINK;
retry:
......@@ -1748,7 +1777,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
dir_block = ext4_bread (handle, inode, 0, 1, &err);
if (!dir_block) {
drop_nlink(inode); /* is this nlink == 0? */
ext4_dec_count(handle, inode); /* is this nlink == 0? */
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
......@@ -1780,7 +1809,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
iput (inode);
goto out_stop;
}
inc_nlink(dir);
ext4_inc_count(handle, dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
d_instantiate(dentry, inode);
......@@ -2045,9 +2074,9 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_rmdir;
if (inode->i_nlink != 2)
if (!EXT4_DIR_LINK_EMPTY(inode))
ext4_warning (inode->i_sb, "ext4_rmdir",
"empty directory has nlink!=2 (%d)",
"empty directory has too many links (%d)",
inode->i_nlink);
inode->i_version++;
clear_nlink(inode);
......@@ -2056,9 +2085,9 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
* recovery. */
inode->i_size = 0;
ext4_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
drop_nlink(dir);
ext4_dec_count(handle, dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
......@@ -2106,13 +2135,13 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
drop_nlink(inode);
ext4_dec_count(handle, inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime;
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
retval = 0;
......@@ -2159,7 +2188,7 @@ static int ext4_symlink (struct inode * dir,
err = __page_symlink(inode, symname, l,
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) {
drop_nlink(inode);
ext4_dec_count(handle, inode);
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
......@@ -2185,8 +2214,9 @@ static int ext4_link (struct dentry * old_dentry,
struct inode *inode = old_dentry->d_inode;
int err, retries = 0;
if (inode->i_nlink >= EXT4_LINK_MAX)
if (EXT4_DIR_LINK_MAX(inode))
return -EMLINK;
/*
* Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
* otherwise has the potential to corrupt the orphan inode list.
......@@ -2203,8 +2233,8 @@ static int ext4_link (struct dentry * old_dentry,
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
inode->i_ctime = CURRENT_TIME_SEC;
inc_nlink(inode);
inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
atomic_inc(&inode->i_count);
err = ext4_add_nondir(handle, dentry, inode);
......@@ -2305,7 +2335,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
old_inode->i_ctime = ext4_current_time(old_inode);
ext4_mark_inode_dirty(handle, old_inode);
/*
......@@ -2337,10 +2367,10 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
}
if (new_inode) {
drop_nlink(new_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
ext4_dec_count(handle, new_inode);
new_inode->i_ctime = ext4_current_time(new_inode);
}
old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
ext4_update_dx_flag(old_dir);
if (dir_bh) {
BUFFER_TRACE(dir_bh, "get_write_access");
......@@ -2348,11 +2378,13 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
ext4_journal_dirty_metadata(handle, dir_bh);
drop_nlink(old_dir);
ext4_dec_count(handle, old_dir);
if (new_inode) {
drop_nlink(new_inode);
/* checked empty_dir above, can't have another parent,
* ext3_dec_count() won't work for many-linked dirs */
new_inode->i_nlink = 0;
} else {
inc_nlink(new_dir);
ext4_inc_count(handle, new_dir);
ext4_update_dx_flag(new_dir);
ext4_mark_inode_dirty(handle, new_dir);
}
......
......@@ -36,6 +36,7 @@
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
#include <linux/log2.h>
#include <asm/uaccess.h>
......@@ -734,7 +735,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
Opt_grpquota, Opt_extents,
Opt_grpquota, Opt_extents, Opt_noextents,
};
static match_table_t tokens = {
......@@ -785,6 +786,7 @@ static match_table_t tokens = {
{Opt_usrquota, "usrquota"},
{Opt_barrier, "barrier=%u"},
{Opt_extents, "extents"},
{Opt_noextents, "noextents"},
{Opt_err, NULL},
{Opt_resize, "resize"},
};
......@@ -1120,6 +1122,9 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_extents:
set_opt (sbi->s_mount_opt, EXTENTS);
break;
case Opt_noextents:
clear_opt (sbi->s_mount_opt, EXTENTS);
break;
default:
printk (KERN_ERR
"EXT4-fs: Unrecognized mount option \"%s\" "
......@@ -1551,6 +1556,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, RESERVATION);
/*
* turn on extents feature by default in ext4 filesystem
* User -o noextents to turn it off
*/
set_opt(sbi->s_mount_opt, EXTENTS);
if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
NULL, 0))
goto failed_mount;
......@@ -1634,13 +1645,15 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
(sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
(!is_power_of_2(sbi->s_inode_size)) ||
(sbi->s_inode_size > blocksize)) {
printk (KERN_ERR
"EXT4-fs: unsupported inode size: %d\n",
sbi->s_inode_size);
goto failed_mount;
}
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
}
sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
......@@ -1803,6 +1816,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount3;
}
if (ext4_blocks_count(es) > 0xffffffffULL &&
!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_64BIT)) {
printk(KERN_ERR "ext4: Failed to set 64-bit journal feature\n");
goto failed_mount4;
}
/* We have now updated the journal if required, so we can
* validate the data journaling mode. */
switch (test_opt(sb, DATA_FLAGS)) {
......@@ -1857,6 +1877,32 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
}
ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
/* determine the minimum size of new large inodes, if present */
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_want_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_want_extra_isize);
if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_min_extra_isize))
sbi->s_want_extra_isize =
le16_to_cpu(es->s_min_extra_isize);
}
}
/* Check if enough inode space is available */
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
sbi->s_inode_size) {
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
printk(KERN_INFO "EXT4-fs: required extra inode space not"
"available.\n");
}
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
......
......@@ -66,13 +66,6 @@
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
#define IHDR(inode, raw_inode) \
((struct ext4_xattr_ibody_header *) \
((void *)raw_inode + \
EXT4_GOOD_OLD_INODE_SIZE + \
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
#ifdef EXT4_XATTR_DEBUG
# define ea_idebug(inode, f...) do { \
printk(KERN_DEBUG "inode %s:%lu: ", \
......@@ -508,6 +501,24 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
return;
}
/*
* Find the available free space for EAs. This also returns the total number of
* bytes used by EA entries.
*/
static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
size_t *min_offs, void *base, int *total)
{
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
*total += EXT4_XATTR_LEN(last->e_name_len);
if (!last->e_value_block && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < *min_offs)
*min_offs = offs;
}
}
return (*min_offs - ((void *)last - base) - sizeof(__u32));
}
struct ext4_xattr_info {
int name_index;
const char *name;
......@@ -1013,7 +1024,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
}
if (!error) {
ext4_xattr_update_super_block(handle, inode->i_sb);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_ctime = ext4_current_time(inode);
if (!value)
EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/*
* The bh is consumed by ext4_mark_iloc_dirty, even with
......@@ -1066,6 +1079,253 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
return error;
}
/*
* Shift the EA entries in the inode to create space for the increased
* i_extra_isize.
*/
static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
int value_offs_shift, void *to,
void *from, size_t n, int blocksize)
{
struct ext4_xattr_entry *last = entry;
int new_offs;
/* Adjust the value offsets of the entries */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
if (!last->e_value_block && last->e_value_size) {
new_offs = le16_to_cpu(last->e_value_offs) +
value_offs_shift;
BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
> blocksize);
last->e_value_offs = cpu_to_le16(new_offs);
}
}
/* Shift the entries by n bytes */
memmove(to, from, n);
}
/*
* Expand an inode by new_extra_isize bytes when EAs are present.
* Returns 0 on success or negative error number on failure.
*/
int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle)
{
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry, *last, *first;
struct buffer_head *bh = NULL;
struct ext4_xattr_ibody_find *is = NULL;
struct ext4_xattr_block_find *bs = NULL;
char *buffer = NULL, *b_entry_name = NULL;
size_t min_offs, free;
int total_ino, total_blk;
void *base, *start, *end;
int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize;
down_write(&EXT4_I(inode)->xattr_sem);
retry:
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
up_write(&EXT4_I(inode)->xattr_sem);
return 0;
}
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
/*
* Check if enough free space is available in the inode to shift the
* entries ahead by new_extra_isize.
*/
base = start = entry;
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
min_offs = end - base;
last = entry;
total_ino = sizeof(struct ext4_xattr_ibody_header);
free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
if (free >= new_extra_isize) {
entry = IFIRST(header);
ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
- new_extra_isize, (void *)raw_inode +
EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
(void *)header, total_ino,
inode->i_sb->s_blocksize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
error = 0;
goto cleanup;
}
/*
* Enough free space isn't available in the inode, check if
* EA block can hold new_extra_isize bytes.
*/
if (EXT4_I(inode)->i_file_acl) {
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
goto cleanup;
if (ext4_xattr_check_block(bh)) {
ext4_error(inode->i_sb, __FUNCTION__,
"inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
base = BHDR(bh);
first = BFIRST(bh);
end = bh->b_data + bh->b_size;
min_offs = end - base;
free = ext4_xattr_free_space(first, &min_offs, base,
&total_blk);
if (free < new_extra_isize) {
if (!tried_min_extra_isize && s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
brelse(bh);
goto retry;
}
error = -1;
goto cleanup;
}
} else {
free = inode->i_sb->s_blocksize;
}
while (new_extra_isize > 0) {
size_t offs, size, entry_size;
struct ext4_xattr_entry *small_entry = NULL;
struct ext4_xattr_info i = {
.value = NULL,
.value_len = 0,
};
unsigned int total_size; /* EA entry size + value size */
unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
unsigned int min_total_size = ~0U;
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
if (!is || !bs) {
error = -ENOMEM;
goto cleanup;
}
is->s.not_found = -ENODATA;
bs->s.not_found = -ENODATA;
is->iloc.bh = NULL;
bs->bh = NULL;
last = IFIRST(header);
/* Find the entry best suited to be pushed into EA block */
entry = NULL;
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
total_size =
EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
EXT4_XATTR_LEN(last->e_name_len);
if (total_size <= free && total_size < min_total_size) {
if (total_size < new_extra_isize) {
small_entry = last;
} else {
entry = last;
min_total_size = total_size;
}
}
}
if (entry == NULL) {
if (small_entry) {
entry = small_entry;
} else {
if (!tried_min_extra_isize &&
s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
goto retry;
}
error = -1;
goto cleanup;
}
}
offs = le16_to_cpu(entry->e_value_offs);
size = le32_to_cpu(entry->e_value_size);
entry_size = EXT4_XATTR_LEN(entry->e_name_len);
i.name_index = entry->e_name_index,
buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
if (!buffer || !b_entry_name) {
error = -ENOMEM;
goto cleanup;
}
/* Save the entry name and the entry value */
memcpy(buffer, (void *)IFIRST(header) + offs,
EXT4_XATTR_SIZE(size));
memcpy(b_entry_name, entry->e_name, entry->e_name_len);
b_entry_name[entry->e_name_len] = '\0';
i.name = b_entry_name;
error = ext4_get_inode_loc(inode, &is->iloc);
if (error)
goto cleanup;
error = ext4_xattr_ibody_find(inode, &i, is);
if (error)
goto cleanup;
/* Remove the chosen entry from the inode */
error = ext4_xattr_ibody_set(handle, inode, &i, is);
entry = IFIRST(header);
if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
shift_bytes = new_extra_isize;
else
shift_bytes = entry_size + size;
/* Adjust the offsets and shift the remaining entries ahead */
ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
shift_bytes, (void *)raw_inode +
EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
(void *)header, total_ino - entry_size,
inode->i_sb->s_blocksize);
extra_isize += shift_bytes;
new_extra_isize -= shift_bytes;
EXT4_I(inode)->i_extra_isize = extra_isize;
i.name = b_entry_name;
i.value = buffer;
i.value_len = cpu_to_le32(size);
error = ext4_xattr_block_find(inode, &i, bs);
if (error)
goto cleanup;
/* Add entry which was removed from the inode into the block */
error = ext4_xattr_block_set(handle, inode, &i, bs);
if (error)
goto cleanup;
kfree(b_entry_name);
kfree(buffer);
brelse(is->iloc.bh);
kfree(is);
kfree(bs);
}
brelse(bh);
up_write(&EXT4_I(inode)->xattr_sem);
return 0;
cleanup:
kfree(b_entry_name);
kfree(buffer);
if (is)
brelse(is->iloc.bh);
kfree(is);
kfree(bs);
brelse(bh);
up_write(&EXT4_I(inode)->xattr_sem);
return error;
}
/*
* ext4_xattr_delete_inode()
*
......
......@@ -56,6 +56,13 @@ struct ext4_xattr_entry {
#define EXT4_XATTR_SIZE(size) \
(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
#define IHDR(inode, raw_inode) \
((struct ext4_xattr_ibody_header *) \
((void *)raw_inode + \
EXT4_GOOD_OLD_INODE_SIZE + \
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
# ifdef CONFIG_EXT4DEV_FS_XATTR
extern struct xattr_handler ext4_xattr_user_handler;
......@@ -74,6 +81,9 @@ extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *,
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
extern int init_ext4_xattr(void);
extern void exit_ext4_xattr(void);
......@@ -129,6 +139,13 @@ exit_ext4_xattr(void)
{
}
static inline int
ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle)
{
return -EOPNOTSUPP;
}
#define ext4_xattr_handlers NULL
# endif /* CONFIG_EXT4DEV_FS_XATTR */
......
......@@ -35,6 +35,7 @@
#include <linux/kthread.h>
#include <linux/poison.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <asm/uaccess.h>
#include <asm/page.h>
......@@ -528,7 +529,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
{
int err = 0;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
spin_lock(&journal->j_state_lock);
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_EMERG
......@@ -1709,7 +1710,7 @@ void jbd2_slab_free(void *ptr, size_t size)
* Journal_head storage management
*/
static struct kmem_cache *jbd2_journal_head_cache;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
static atomic_t nr_journal_heads = ATOMIC_INIT(0);
#endif
......@@ -1747,7 +1748,7 @@ static struct journal_head *journal_alloc_journal_head(void)
struct journal_head *ret;
static unsigned long last_warning;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
atomic_inc(&nr_journal_heads);
#endif
ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
......@@ -1768,7 +1769,7 @@ static struct journal_head *journal_alloc_journal_head(void)
static void journal_free_journal_head(struct journal_head *jh)
{
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
atomic_dec(&nr_journal_heads);
memset(jh, JBD_POISON_FREE, sizeof(*jh));
#endif
......@@ -1951,64 +1952,50 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
}
/*
* /proc tunables
* debugfs tunables
*/
#if defined(CONFIG_JBD_DEBUG)
int jbd2_journal_enable_debug;
#if defined(CONFIG_JBD2_DEBUG)
u8 jbd2_journal_enable_debug;
EXPORT_SYMBOL(jbd2_journal_enable_debug);
#endif
#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)
static struct proc_dir_entry *proc_jbd_debug;
#define JBD2_DEBUG_NAME "jbd2-debug"
static int read_jbd_debug(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
int ret;
struct dentry *jbd2_debugfs_dir, *jbd2_debug;
ret = sprintf(page + off, "%d\n", jbd2_journal_enable_debug);
*eof = 1;
return ret;
static void __init jbd2_create_debugfs_entry(void)
{
jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
if (jbd2_debugfs_dir)
jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO,
jbd2_debugfs_dir,
&jbd2_journal_enable_debug);
}
static int write_jbd_debug(struct file *file, const char __user *buffer,
unsigned long count, void *data)
static void __exit jbd2_remove_debugfs_entry(void)
{
char buf[32];
if (count > ARRAY_SIZE(buf) - 1)
count = ARRAY_SIZE(buf) - 1;
if (copy_from_user(buf, buffer, count))
return -EFAULT;
buf[ARRAY_SIZE(buf) - 1] = '\0';
jbd2_journal_enable_debug = simple_strtoul(buf, NULL, 10);
return count;
if (jbd2_debug)
debugfs_remove(jbd2_debug);
if (jbd2_debugfs_dir)
debugfs_remove(jbd2_debugfs_dir);
}
#define JBD_PROC_NAME "sys/fs/jbd2-debug"
#else
static void __init create_jbd_proc_entry(void)
static void __init jbd2_create_debugfs_entry(void)
{
proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
if (proc_jbd_debug) {
/* Why is this so hard? */
proc_jbd_debug->read_proc = read_jbd_debug;
proc_jbd_debug->write_proc = write_jbd_debug;
}
do {
} while (0);
}
static void __exit jbd2_remove_jbd_proc_entry(void)
static void __exit jbd2_remove_debugfs_entry(void)
{
if (proc_jbd_debug)
remove_proc_entry(JBD_PROC_NAME, NULL);
do {
} while (0);
}
#else
#define create_jbd_proc_entry() do {} while (0)
#define jbd2_remove_jbd_proc_entry() do {} while (0)
#endif
struct kmem_cache *jbd2_handle_cache;
......@@ -2067,18 +2054,18 @@ static int __init journal_init(void)
ret = journal_init_caches();
if (ret != 0)
jbd2_journal_destroy_caches();
create_jbd_proc_entry();
jbd2_create_debugfs_entry();
return ret;
}
static void __exit journal_exit(void)
{
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
int n = atomic_read(&nr_journal_heads);
if (n)
printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
#endif
jbd2_remove_jbd_proc_entry();
jbd2_remove_debugfs_entry();
jbd2_journal_destroy_caches();
}
......
......@@ -295,7 +295,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
printk(KERN_ERR "JBD: error %d scanning journal\n", err);
++journal->j_transaction_sequence;
} else {
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
#endif
jbd_debug(0,
......
......@@ -26,6 +26,7 @@
#include <linux/syscalls.h>
#include <linux/rcupdate.h>
#include <linux/audit.h>
#include <linux/falloc.h>
int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
......@@ -352,6 +353,64 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
}
#endif
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len)
{
struct file *file;
struct inode *inode;
long ret = -EINVAL;
if (offset < 0 || len <= 0)
goto out;
/* Return error if mode is not supported */
ret = -EOPNOTSUPP;
if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
goto out;
ret = -EBADF;
file = fget(fd);
if (!file)
goto out;
if (!(file->f_mode & FMODE_WRITE))
goto out_fput;
/*
* Revalidate the write permissions, in case security policy has
* changed since the files were opened.
*/
ret = security_file_permission(file, MAY_WRITE);
if (ret)
goto out_fput;
inode = file->f_path.dentry->d_inode;
ret = -ESPIPE;
if (S_ISFIFO(inode->i_mode))
goto out_fput;
ret = -ENODEV;
/*
* Let individual file system decide if it supports preallocation
* for directories or not.
*/
if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
goto out_fput;
ret = -EFBIG;
/* Check for wrap through zero too */
if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
goto out_fput;
if (inode->i_op && inode->i_op->fallocate)
ret = inode->i_op->fallocate(inode, mode, offset, len);
else
ret = -ENOSYS;
out_fput:
fput(file);
out:
return ret;
}
/*
* access() needs to use the real uid/gid, not the effective uid/gid.
* We do this by temporarily clearing all FS-related capabilities and
......
......@@ -329,10 +329,11 @@
#define __NR_signalfd 321
#define __NR_timerfd 322
#define __NR_eventfd 323
#define __NR_fallocate 324
#ifdef __KERNEL__
#define NR_syscalls 324
#define NR_syscalls 325
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
......
......@@ -308,6 +308,7 @@ COMPAT_SYS_SPU(move_pages)
SYSCALL_SPU(getcpu)
COMPAT_SYS(epoll_pwait)
COMPAT_SYS_SPU(utimensat)
COMPAT_SYS(fallocate)
COMPAT_SYS_SPU(signalfd)
COMPAT_SYS_SPU(timerfd)
SYSCALL_SPU(eventfd)
......
......@@ -331,10 +331,11 @@
#define __NR_timerfd 306
#define __NR_eventfd 307
#define __NR_sync_file_range2 308
#define __NR_fallocate 309
#ifdef __KERNEL__
#define __NR_syscalls 309
#define __NR_syscalls 310
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
......
......@@ -630,6 +630,8 @@ __SYSCALL(__NR_signalfd, sys_signalfd)
__SYSCALL(__NR_timerfd, sys_timerfd)
#define __NR_eventfd 284
__SYSCALL(__NR_eventfd, sys_eventfd)
#define __NR_fallocate 285
__SYSCALL(__NR_fallocate, sys_fallocate)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
......
......@@ -71,7 +71,7 @@
/*
* Maximal count of links to a file
*/
#define EXT4_LINK_MAX 32000
#define EXT4_LINK_MAX 65000
/*
* Macro-instructions used to manage several block sizes
......@@ -102,6 +102,7 @@
EXT4_GOOD_OLD_FIRST_INO : \
(s)->s_first_ino)
#endif
#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
/*
* Macro-instructions used to manage fragments
......@@ -201,6 +202,7 @@ struct ext4_group_desc
#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
......@@ -225,6 +227,11 @@ struct ext4_new_group_data {
__u32 free_blocks_count;
};
/*
* Following is used by preallocation code to tell get_blocks() that we
* want uninitialzed extents.
*/
#define EXT4_CREATE_UNINITIALIZED_EXT 2
/*
* ioctl commands
......@@ -237,7 +244,7 @@ struct ext4_new_group_data {
#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
......@@ -253,7 +260,7 @@ struct ext4_new_group_data {
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
#endif
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
......@@ -282,7 +289,7 @@ struct ext4_inode {
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
__le32 i_ctime; /* Creation time */
__le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
......@@ -331,10 +338,85 @@ struct ext4_inode {
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_pad1;
__le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
__le32 i_crtime; /* File Creation time */
__le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
};
#define i_size_high i_dir_acl
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
/*
* Extended fields will fit into an inode if the filesystem was formatted
* with large inodes (-I 256 or larger) and there are not currently any EAs
* consuming all of the available space. For new inodes we always reserve
* enough space for the kernel's known extended fields, but for inodes
* created with an old kernel this might not have been the case. None of
* the extended inode fields is critical for correct filesystem operation.
* This macro checks if a certain field fits in the inode. Note that
* inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
*/
#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
((offsetof(typeof(*ext4_inode), field) + \
sizeof((ext4_inode)->field)) \
<= (EXT4_GOOD_OLD_INODE_SIZE + \
(einode)->i_extra_isize)) \
static inline __le32 ext4_encode_extra_time(struct timespec *time)
{
return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
time->tv_sec >> 32 : 0) |
((time->tv_nsec << 2) & EXT4_NSEC_MASK));
}
static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
{
if (sizeof(time->tv_sec) > 4)
time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
<< 32;
time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
}
#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
do { \
(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
(raw_inode)->xtime ## _extra = \
ext4_encode_extra_time(&(inode)->xtime); \
} while (0)
#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
(raw_inode)->xtime ## _extra = \
ext4_encode_extra_time(&(einode)->xtime); \
} while (0)
#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
do { \
(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
} while (0)
#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(einode)->xtime.tv_sec = \
(signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
} while (0)
#if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1
#define i_frag osd2.linux2.l_i_frag
......@@ -533,6 +615,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}
static inline struct timespec ext4_current_time(struct inode *inode)
{
return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
}
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
{
return ino == EXT4_ROOT_INO ||
......@@ -603,6 +692,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
......@@ -620,6 +711,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
/*
......@@ -862,6 +955,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern void ext4_truncate (struct inode *);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
......@@ -983,6 +1077,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
extern void ext4_ext_truncate(struct inode *, struct page *);
extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
loff_t len);
static inline int
ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
......
......@@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
#define EXT_MAX_BLOCK 0xffffffff
#define EXT_MAX_LEN ((1UL << 15) - 1)
/*
* EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
* initialized extent. This is 2^15 and not (2^16 - 1), since we use the
* MSB of ee_len field in the extent datastructure to signify if this
* particular extent is an initialized extent or an uninitialized (i.e.
* preallocated).
* EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
* uninitialized extent.
* If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
* uninitialized one. In other words, if MSB of ee_len is set, it is an
* uninitialized extent with only one special scenario when ee_len = 0x8000.
* In this case we can not have an uninitialized extent of zero length and
* thus we make it as a special case of initialized extent with 0x8000 length.
* This way we get better extent-to-group alignment for initialized extents.
* Hence, the maximum number of blocks we can have in an *initialized*
* extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
*/
#define EXT_INIT_MAX_LEN (1UL << 15)
#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
#define EXT_FIRST_EXTENT(__hdr__) \
......@@ -188,8 +206,31 @@ ext4_ext_invalidate_cache(struct inode *inode)
EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
}
static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
{
/* We can not have an uninitialized extent of zero length! */
BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
}
static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
{
/* Extent with ee_len of 0x8000 is treated as an initialized extent */
return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
}
static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
{
return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
le16_to_cpu(ext->ee_len) :
(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
}
extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
......
......@@ -153,6 +153,11 @@ struct ext4_inode_info {
unsigned long i_ext_generation;
struct ext4_ext_cache i_cached_extent;
/*
* File creation time. Its function is same as that of
* struct timespec i_{a,c,m}time in the generic inode.
*/
struct timespec i_crtime;
};
#endif /* _LINUX_EXT4_FS_I */
......@@ -73,7 +73,7 @@ struct ext4_sb_info {
struct list_head s_orphan;
unsigned long s_commit_interval;
struct block_device *journal_bdev;
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
......@@ -81,6 +81,7 @@ struct ext4_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
#ifdef EXTENTS_STATS
/* ext4 extents stats */
......
#ifndef _FALLOC_H_
#define _FALLOC_H_
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#endif /* _FALLOC_H_ */
......@@ -1147,6 +1147,8 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*truncate_range)(struct inode *, loff_t, loff_t);
long (*fallocate)(struct inode *inode, int mode, loff_t offset,
loff_t len);
};
struct seq_file;
......
......@@ -50,14 +50,14 @@
*/
#define JBD_DEFAULT_MAX_COMMIT_AGE 5
#ifdef CONFIG_JBD_DEBUG
#ifdef CONFIG_JBD2_DEBUG
/*
* Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
* consistency checks. By default we don't do this unless
* CONFIG_JBD_DEBUG is on.
* CONFIG_JBD2_DEBUG is on.
*/
#define JBD_EXPENSIVE_CHECKING
extern int jbd2_journal_enable_debug;
extern u8 jbd2_journal_enable_debug;
#define jbd_debug(n, f, a...) \
do { \
......
......@@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
const struct itimerspec __user *utmr);
asmlinkage long sys_eventfd(unsigned int count);
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册