提交 83a21c18 编写于 作者: C Chandan Babu R

xfs: Directory's data fork extent counter can never overflow

The maximum file size that can be represented by the data fork extent counter
in the worst case occurs when all extents are 1 block in length and each block
is 1KB in size.

With XFS_MAX_EXTCNT_DATA_FORK_SMALL representing maximum extent count and with
1KB sized blocks, a file can reach upto,
(2^31) * 1KB = 2TB

This is much larger than the theoretical maximum size of a directory
i.e. XFS_DIR2_SPACE_SIZE * 3 = ~96GB.

Since a directory's inode can never overflow its data fork extent counter,
this commit removes all the overflow checks associated with
it. xfs_dinode_verify() now performs a rough check to verify if a diretory's
data fork is larger than 96GB.
Reviewed-by: NDave Chinner <dchinner@redhat.com>
Reviewed-by: NDarrick J. Wong <djwong@kernel.org>
Signed-off-by: NChandan Babu R <chandan.babu@oracle.com>
上级 52a4a148
...@@ -5147,26 +5147,6 @@ xfs_bmap_del_extent_real( ...@@ -5147,26 +5147,6 @@ xfs_bmap_del_extent_real(
* Deleting the middle of the extent. * Deleting the middle of the extent.
*/ */
/*
* For directories, -ENOSPC is returned since a directory entry
* remove operation must not fail due to low extent count
* availability. -ENOSPC will be handled by higher layers of XFS
* by letting the corresponding empty Data/Free blocks to linger
* until a future remove operation. Dabtree blocks would be
* swapped with the last block in the leaf space and then the
* new last block will be unmapped.
*
* The above logic also applies to the source directory entry of
* a rename operation.
*/
error = xfs_iext_count_may_overflow(ip, whichfork, 1);
if (error) {
ASSERT(S_ISDIR(VFS_I(ip)->i_mode) &&
whichfork == XFS_DATA_FORK);
error = -ENOSPC;
goto done;
}
old = got; old = got;
got.br_blockcount = del->br_startoff - got.br_startoff; got.br_blockcount = del->br_startoff - got.br_startoff;
......
...@@ -30,6 +30,7 @@ struct xfs_da_geometry { ...@@ -30,6 +30,7 @@ struct xfs_da_geometry {
unsigned int free_hdr_size; /* dir2 free header size */ unsigned int free_hdr_size; /* dir2 free header size */
unsigned int free_max_bests; /* # of bests entries in dir2 free */ unsigned int free_max_bests; /* # of bests entries in dir2 free */
xfs_dablk_t freeblk; /* blockno of free data v2 */ xfs_dablk_t freeblk; /* blockno of free data v2 */
xfs_extnum_t max_extents; /* Max. extents in corresponding fork */
xfs_dir2_data_aoff_t data_first_offset; xfs_dir2_data_aoff_t data_first_offset;
size_t data_entry_offset; size_t data_entry_offset;
......
...@@ -277,6 +277,7 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) ...@@ -277,6 +277,7 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
* Directory address space divided into sections, * Directory address space divided into sections,
* spaces separated by 32GB. * spaces separated by 32GB.
*/ */
#define XFS_DIR2_MAX_SPACES 3
#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) #define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
#define XFS_DIR2_DATA_SPACE 0 #define XFS_DIR2_DATA_SPACE 0
#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) #define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
......
...@@ -150,6 +150,8 @@ xfs_da_mount( ...@@ -150,6 +150,8 @@ xfs_da_mount(
dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) / dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) /
(uint)sizeof(xfs_da_node_entry_t); (uint)sizeof(xfs_da_node_entry_t);
dageo->max_extents = (XFS_DIR2_MAX_SPACES * XFS_DIR2_SPACE_SIZE) >>
mp->m_sb.sb_blocklog;
dageo->magicpct = (dageo->blksize * 37) / 100; dageo->magicpct = (dageo->blksize * 37) / 100;
/* set up attribute geometry - single fsb only */ /* set up attribute geometry - single fsb only */
...@@ -161,6 +163,12 @@ xfs_da_mount( ...@@ -161,6 +163,12 @@ xfs_da_mount(
dageo->node_hdr_size = mp->m_dir_geo->node_hdr_size; dageo->node_hdr_size = mp->m_dir_geo->node_hdr_size;
dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) / dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) /
(uint)sizeof(xfs_da_node_entry_t); (uint)sizeof(xfs_da_node_entry_t);
if (xfs_has_large_extent_counts(mp))
dageo->max_extents = XFS_MAX_EXTCNT_ATTR_FORK_LARGE;
else
dageo->max_extents = XFS_MAX_EXTCNT_ATTR_FORK_SMALL;
dageo->magicpct = (dageo->blksize * 37) / 100; dageo->magicpct = (dageo->blksize * 37) / 100;
return 0; return 0;
} }
......
...@@ -915,6 +915,19 @@ enum xfs_dinode_fmt { ...@@ -915,6 +915,19 @@ enum xfs_dinode_fmt {
* *
* Rounding up 47 to the nearest multiple of bits-per-byte results in 48. Hence * Rounding up 47 to the nearest multiple of bits-per-byte results in 48. Hence
* 2^48 was chosen as the maximum data fork extent count. * 2^48 was chosen as the maximum data fork extent count.
*
* The maximum file size that can be represented by the data fork extent counter
* in the worst case occurs when all extents are 1 block in length and each
* block is 1KB in size.
*
* With XFS_MAX_EXTCNT_DATA_FORK_SMALL representing maximum extent count and
* with 1KB sized blocks, a file can reach upto,
* 1KB * (2^31) = 2TB
*
* This is much larger than the theoretical maximum size of a directory
* i.e. XFS_DIR2_SPACE_SIZE * XFS_DIR2_MAX_SPACES = ~96GB.
*
* Hence, a directory inode can never overflow its data fork extent counter.
*/ */
#define XFS_MAX_EXTCNT_DATA_FORK_LARGE ((xfs_extnum_t)((1ULL << 48) - 1)) #define XFS_MAX_EXTCNT_DATA_FORK_LARGE ((xfs_extnum_t)((1ULL << 48) - 1))
#define XFS_MAX_EXTCNT_ATTR_FORK_LARGE ((xfs_extnum_t)((1ULL << 32) - 1)) #define XFS_MAX_EXTCNT_ATTR_FORK_LARGE ((xfs_extnum_t)((1ULL << 32) - 1))
......
...@@ -491,6 +491,9 @@ xfs_dinode_verify( ...@@ -491,6 +491,9 @@ xfs_dinode_verify(
if (mode && nextents + naextents > nblocks) if (mode && nextents + naextents > nblocks)
return __this_address; return __this_address;
if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
return __this_address;
if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
return __this_address; return __this_address;
......
...@@ -39,19 +39,6 @@ struct xfs_ifork { ...@@ -39,19 +39,6 @@ struct xfs_ifork {
*/ */
#define XFS_IEXT_PUNCH_HOLE_CNT (1) #define XFS_IEXT_PUNCH_HOLE_CNT (1)
/*
* Directory entry addition can cause the following,
* 1. Data block can be added/removed.
* A new extent can cause extent count to increase by 1.
* 2. Free disk block can be added/removed.
* Same behaviour as described above for Data block.
* 3. Dabtree blocks.
* XFS_DA_NODE_MAXDEPTH blocks can be added. Each of these can be new
* extents. Hence extent count can increase by XFS_DA_NODE_MAXDEPTH.
*/
#define XFS_IEXT_DIR_MANIP_CNT(mp) \
((XFS_DA_NODE_MAXDEPTH + 1 + 1) * (mp)->m_dir_geo->fsbcount)
/* /*
* Adding/removing an xattr can cause XFS_DA_NODE_MAXDEPTH extents to * Adding/removing an xattr can cause XFS_DA_NODE_MAXDEPTH extents to
* be added. One extra extent for dabtree in case a local attr is * be added. One extra extent for dabtree in case a local attr is
......
...@@ -1024,11 +1024,6 @@ xfs_create( ...@@ -1024,11 +1024,6 @@ xfs_create(
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true; unlock_dp_on_error = true;
error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
XFS_IEXT_DIR_MANIP_CNT(mp));
if (error)
goto out_trans_cancel;
/* /*
* A newly created regular or special file just has one directory * A newly created regular or special file just has one directory
* entry pointing to them, but a directory also the "." entry * entry pointing to them, but a directory also the "." entry
...@@ -1242,11 +1237,6 @@ xfs_link( ...@@ -1242,11 +1237,6 @@ xfs_link(
if (error) if (error)
goto std_return; goto std_return;
error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
XFS_IEXT_DIR_MANIP_CNT(mp));
if (error)
goto error_return;
/* /*
* If we are using project inheritance, we only allow hard link * If we are using project inheritance, we only allow hard link
* creation in our tree when the project IDs are the same; else * creation in our tree when the project IDs are the same; else
...@@ -3210,35 +3200,6 @@ xfs_rename( ...@@ -3210,35 +3200,6 @@ xfs_rename(
/* /*
* Check for expected errors before we dirty the transaction * Check for expected errors before we dirty the transaction
* so we can return an error without a transaction abort. * so we can return an error without a transaction abort.
*
* Extent count overflow check:
*
* From the perspective of src_dp, a rename operation is essentially a
* directory entry remove operation. Hence the only place where we check
* for extent count overflow for src_dp is in
* xfs_bmap_del_extent_real(). xfs_bmap_del_extent_real() returns
* -ENOSPC when it detects a possible extent count overflow and in
* response, the higher layers of directory handling code do the
* following:
* 1. Data/Free blocks: XFS lets these blocks linger until a
* future remove operation removes them.
* 2. Dabtree blocks: XFS swaps the blocks with the last block in the
* Leaf space and unmaps the last block.
*
* For target_dp, there are two cases depending on whether the
* destination directory entry exists or not.
*
* When destination directory entry does not exist (i.e. target_ip ==
* NULL), extent count overflow check is performed only when transaction
* has a non-zero sized space reservation associated with it. With a
* zero-sized space reservation, XFS allows a rename operation to
* continue only when the directory has sufficient free space in its
* data/leaf/free space blocks to hold the new entry.
*
* When destination directory entry exists (i.e. target_ip != NULL), all
* we need to do is change the inode number associated with the already
* existing entry. Hence there is no need to perform an extent count
* overflow check.
*/ */
if (target_ip == NULL) { if (target_ip == NULL) {
/* /*
...@@ -3249,12 +3210,6 @@ xfs_rename( ...@@ -3249,12 +3210,6 @@ xfs_rename(
error = xfs_dir_canenter(tp, target_dp, target_name); error = xfs_dir_canenter(tp, target_dp, target_name);
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
} else {
error = xfs_iext_count_may_overflow(target_dp,
XFS_DATA_FORK,
XFS_IEXT_DIR_MANIP_CNT(mp));
if (error)
goto out_trans_cancel;
} }
} else { } else {
/* /*
...@@ -3422,18 +3377,12 @@ xfs_rename( ...@@ -3422,18 +3377,12 @@ xfs_rename(
* inode number of the whiteout inode rather than removing it * inode number of the whiteout inode rather than removing it
* altogether. * altogether.
*/ */
if (wip) { if (wip)
error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
spaceres); spaceres);
} else { else
/*
* NOTE: We don't need to check for extent count overflow here
* because the dir remove name code will leave the dir block in
* place if the extent count would overflow.
*/
error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
spaceres); spaceres);
}
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
......
...@@ -226,11 +226,6 @@ xfs_symlink( ...@@ -226,11 +226,6 @@ xfs_symlink(
goto out_trans_cancel; goto out_trans_cancel;
} }
error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK,
XFS_IEXT_DIR_MANIP_CNT(mp));
if (error)
goto out_trans_cancel;
/* /*
* Allocate an inode for the symlink. * Allocate an inode for the symlink.
*/ */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册