xfs: skip unallocated regions of inode chunks in xfs_ifree_cluster()

xfs_ifree_cluster() is called to mark all in-memory inodes and inode buffers as stale. This occurs after we've removed the inobt records and dropped any references of inobt data. xfs_ifree_cluster() uses the starting inode number to walk the namespace of inodes expected for a single chunk a cluster buffer at a time. The cluster buffer disk addresses are calculated by decoding the sequential inode numbers expected from the chunk. The problem with this approach is that if the inode chunk being removed is a sparse chunk, not all of the buffer addresses that are calculated as part of this sequence may be inode clusters. Attempting to acquire the buffer based on expected inode characterstics (i.e., cluster length) can lead to errors and is generally incorrect. We already use a couple variables to carry requisite state from xfs_difree() to xfs_ifree_cluster(). Rather than add a third, define a new internal structure to carry the existing parameters through these functions. Add an alloc field that represents the physical allocation bitmap of inodes in the chunk being removed. Modify xfs_ifree_cluster() to check each inode against the bitmap and skip the clusters that were never allocated as real inodes on disk. Signed-off-by: N Brian Foster <bfoster@redhat.com> Reviewed-by: N Dave Chinner <dchinner@redhat.com> Signed-off-by: N Dave Chinner <david@fromorbit.com>

xfs: skip unallocated regions of inode chunks in xfs_ifree_cluster()
xfs_ifree_cluster() is called to mark all in-memory inodes and inode buffers as stale. This occurs after we've removed the inobt records and dropped any references of inobt data. xfs_ifree_cluster() uses the starting inode number to walk the namespace of inodes expected for a single chunk a cluster buffer at a time. The cluster buffer disk addresses are calculated by decoding the sequential inode numbers expected from the chunk. The problem with this approach is that if the inode chunk being removed is a sparse chunk, not all of the buffer addresses that are calculated as part of this sequence may be inode clusters. Attempting to acquire the buffer based on expected inode characterstics (i.e., cluster length) can lead to errors and is generally incorrect. We already use a couple variables to carry requisite state from xfs_difree() to xfs_ifree_cluster(). Rather than add a third, define a new internal structure to carry the existing parameters through these functions. Add an alloc field that represents the physical allocation bitmap of inodes in the chunk being removed. Modify xfs_ifree_cluster() to check each inode against the bitmap and skip the clusters that were never allocated as real inodes on disk. Signed-off-by: N Brian Foster <bfoster@redhat.com> Reviewed-by: N Dave Chinner <dchinner@redhat.com> Signed-off-by: N Dave Chinner <david@fromorbit.com>
09b56604 · Brian Foster · Dave Chinner · 10ae3dc7 · 09b56604 · 09b56604
隐藏空白更改
内联并排

Showing with 35 addition and 20 deletion

fs/xfs/libxfs/xfs_ialloc.c fs/xfs/libxfs/xfs_ialloc.c +7 -10

fs/xfs/libxfs/xfs_ialloc.h fs/xfs/libxfs/xfs_ialloc.h +8 -2

fs/xfs/xfs_inode.c fs/xfs/xfs_inode.c +20 -8

未找到文件。
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1885,8 +1885,7 @@ xfs_difree_inobt(
 	struct xfs_buf			*agbp,
 	xfs_agino_t			agino,
 	struct xfs_bmap_free		*flist,
-	int				*deleted,
+	struct xfs_icluster		*xic,
-	xfs_ino_t			*first_ino,
 	struct xfs_inobt_rec_incore	*orec)
 {
 	struct xfs_agi			*agi = XFS_BUF_TO_AGI(agbp);
@@ -1947,9 +1946,9 @@ xfs_difree_inobt(
 	if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
 	    rec.ir_free == XFS_INOBT_ALL_FREE &&
 	    mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
+		xic->deleted = 1;
-		*deleted = 1;
+		xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
-		*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
+		xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
 		/*
 		 * Remove the inode cluster from the AGI B+Tree, adjust the
@@ -1974,7 +1973,7 @@ xfs_difree_inobt(
 		xfs_difree_inode_chunk(mp, agno, &rec, flist);
 	} else {
-		*deleted = 0;
+		xic->deleted = 0;
 		error = xfs_inobt_update(cur, &rec);
 		if (error) {
@@ -2118,8 +2117,7 @@ xfs_difree(
 	struct xfs_trans	*tp,		/* transaction pointer */
 	xfs_ino_t		inode,		/* inode to be freed */
 	struct xfs_bmap_free	*flist,		/* extents to free */
-	int			*deleted,/* set if inode cluster was deleted */
+	struct xfs_icluster	*xic)	/* cluster info if deleted */
-	xfs_ino_t		*first_ino)/* first inode in deleted cluster */
 {
 	/* REFERENCED */
 	xfs_agblock_t		agbno;	/* block number containing inode */
@@ -2170,8 +2168,7 @@ xfs_difree(
 	/*
 	 * Fix up the inode allocation btree.
 	 */
-	error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
+	error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);
-				 &rec);
 	if (error)
 		goto error0;

--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -28,6 +28,13 @@ struct xfs_btree_cur;
 /* Move inodes in clusters of this size */
 #define	XFS_INODE_BIG_CLUSTER_SIZE	8192
+struct xfs_icluster {
+	bool		deleted;	/* record is deleted */
+	xfs_ino_t	first_ino;	/* first inode number */
+	uint64_t	alloc;		/* inode phys. allocation bitmap for
+					 * sparse chunks */
+};
 /* Calculate and return the number of filesystem blocks per inode cluster */
 static inline int
 xfs_icluster_size_fsb(
@@ -90,8 +97,7 @@ xfs_difree(
 	struct xfs_trans *tp,		/* transaction pointer */
 	xfs_ino_t	inode,		/* inode to be freed */
 	struct xfs_bmap_free *flist,	/* extents to free */
-	int		*deleted,	/* set if inode cluster was deleted */
+	struct xfs_icluster *ifree);	/* cluster info if deleted */
-	xfs_ino_t	*first_ino);	/* first inode in deleted cluster */
 /*
 * Return the location of the inode in imap, for mapping it into a buffer.

--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2239,9 +2239,9 @@ xfs_iunlink_remove(
 */
 STATIC int
 xfs_ifree_cluster(
-	xfs_inode_t	*free_ip,
+	xfs_inode_t		*free_ip,
-	xfs_trans_t	*tp,
+	xfs_trans_t		*tp,
-	xfs_ino_t	inum)
+	struct xfs_icluster	*xic)
 {
 	xfs_mount_t		*mp = free_ip->i_mount;
 	int			blks_per_cluster;
@@ -2254,13 +2254,26 @@ xfs_ifree_cluster(
 	xfs_inode_log_item_t	*iip;
 	xfs_log_item_t		*lip;
 	struct xfs_perag	*pag;
+	xfs_ino_t		inum;
+	inum = xic->first_ino;
 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
 	blks_per_cluster = xfs_icluster_size_fsb(mp);
 	inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
 	nbufs = mp->m_ialloc_blks / blks_per_cluster;
 	for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
+		/*
+		 * The allocation bitmap tells us which inodes of the chunk were
+		 * physically allocated. Skip the cluster if an inode falls into
+		 * a sparse region.
+		 */
+		if ((xic->alloc & XFS_INOBT_MASK(inum - xic->first_ino)) == 0) {
+			ASSERT(((inum - xic->first_ino) %
+				inodes_per_cluster) == 0);
+			continue;
+		}
 		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
 					 XFS_INO_TO_AGBNO(mp, inum));
@@ -2418,8 +2431,7 @@ xfs_ifree(
 	xfs_bmap_free_t	*flist)
 {
 	int			error;
-	int			delete;
+	struct xfs_icluster	xic = { 0 };
-	xfs_ino_t		first_ino;
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	ASSERT(ip->i_d.di_nlink == 0);
@@ -2435,7 +2447,7 @@ xfs_ifree(
 	if (error)
 		return error;
-	error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
+	error = xfs_difree(tp, ip->i_ino, flist, &xic);
 	if (error)
 		return error;
@@ -2452,8 +2464,8 @@ xfs_ifree(
 	ip->i_d.di_gen++;
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	if (delete)
+	if (xic.deleted)
-		error = xfs_ifree_cluster(ip, tp, first_ino);
+		error = xfs_ifree_cluster(ip, tp, &xic);
 	return error;
 }