Merge tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner: "This is a fix for a regression introduced in 4.5-rc1 by the new torn log write detection code. The regression only affects people moving a clean filesystem between machines/kernels of different architecture (such as changing between 32 bit and 64 bit kernels), but this is the recommended (and only!) safe way to migrate a filesystem between architectures so we really need to ensure it works. The changes are larger than I'd prefer right at the end of the release cycle, but the majority of the change is just factoring code to enable the detection of a clean log at the correct time to avoid this issue. Changes: - Only perform torn log write detection on dirty logs. This prevents failures being detected due to a clean filesystem being moved between machines or kernels of different architectures (e.g. 32 -> 64 bit, BE -> LE, etc). This fixes a regression introduced by the torn log write detection in 4.5-rc1" * tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: only run torn log write detection on dirty logs xfs: refactor in-core log state update to helper xfs: refactor unmount record detection into helper xfs: separate log head record discovery from verification

Merge tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs fixes from Dave Chinner: "This is a fix for a regression introduced in 4.5-rc1 by the new torn log write detection code. The regression only affects people moving a clean filesystem between machines/kernels of different architecture (such as changing between 32 bit and 64 bit kernels), but this is the recommended (and only!) safe way to migrate a filesystem between architectures so we really need to ensure it works. The changes are larger than I'd prefer right at the end of the release cycle, but the majority of the change is just factoring code to enable the detection of a clean log at the correct time to avoid this issue. Changes: - Only perform torn log write detection on dirty logs. This prevents failures being detected due to a clean filesystem being moved between machines or kernels of different architectures (e.g. 32 -> 64 bit, BE -> LE, etc). This fixes a regression introduced by the torn log write detection in 4.5-rc1" * tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: only run torn log write detection on dirty logs xfs: refactor in-core log state update to helper xfs: refactor unmount record detection into helper xfs: separate log head record discovery from verification
2a62ec0a · Linus Torvalds · 63cf207e · 7f6aff3a · 2a62ec0a
隐藏空白更改
内联并排

Showing with 168 addition and 103 deletion

fs/xfs/xfs_log_recover.c fs/xfs/xfs_log_recover.c +168 -103

未找到文件。
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1109,27 +1109,10 @@ xlog_verify_head(
 	bool			tmp_wrapped;
 	/*
-	 * Search backwards through the log looking for the log record header
+	 * Check the head of the log for torn writes. Search backwards from the
-	 * block. This wraps all the way back around to the head so something is
+	 * head until we hit the tail or the maximum number of log record I/Os
-	 * seriously wrong if we can't find it.
+	 * that could have been in flight at one time. Use a temporary buffer so
-	 */
+	 * we don't trash the rhead/bp pointers from the caller.
-	found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
-				      rhead, wrapped);
-	if (found < 0)
-		return found;
-	if (!found) {
-		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
-		return -EIO;
-	}
-	*tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
-	/*
-	 * Now that we have a tail block, check the head of the log for torn
-	 * writes. Search again until we hit the tail or the maximum number of
-	 * log record I/Os that could have been in flight at one time. Use a
-	 * temporary buffer so we don't trash the rhead/bp pointer from the
-	 * call above.
 	 */
 	tmp_bp = xlog_get_bp(log, 1);
 	if (!tmp_bp)
@@ -1215,6 +1198,115 @@ xlog_verify_head(
 	return error;
 }
+/*
+ * Check whether the head of the log points to an unmount record. In other
+ * words, determine whether the log is clean. If so, update the in-core state
+ * appropriately.
+ */
+static int
+xlog_check_unmount_rec(
+	struct xlog		*log,
+	xfs_daddr_t		*head_blk,
+	xfs_daddr_t		*tail_blk,
+	struct xlog_rec_header	*rhead,
+	xfs_daddr_t		rhead_blk,
+	struct xfs_buf		*bp,
+	bool			*clean)
+{
+	struct xlog_op_header	*op_head;
+	xfs_daddr_t		umount_data_blk;
+	xfs_daddr_t		after_umount_blk;
+	int			hblks;
+	int			error;
+	char			*offset;
+	*clean = false;
+	/*
+	 * Look for unmount record. If we find it, then we know there was a
+	 * clean unmount. Since 'i' could be the last block in the physical
+	 * log, we convert to a log block before comparing to the head_blk.
+	 *
+	 * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+	 * below. We won't want to clear the unmount record if there is one, so
+	 * we pass the lsn of the unmount record rather than the block after it.
+	 */
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+		int	h_size = be32_to_cpu(rhead->h_size);
+		int	h_version = be32_to_cpu(rhead->h_version);
+		if ((h_version & XLOG_VERSION_2) &&
+		    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+			hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+			if (h_size % XLOG_HEADER_CYCLE_SIZE)
+				hblks++;
+		} else {
+			hblks = 1;
+		}
+	} else {
+		hblks = 1;
+	}
+	after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+	after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
+	if (*head_blk == after_umount_blk &&
+	    be32_to_cpu(rhead->h_num_logops) == 1) {
+		umount_data_blk = rhead_blk + hblks;
+		umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
+		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+		if (error)
+			return error;
+		op_head = (struct xlog_op_header *)offset;
+		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+			/*
+			 * Set tail and last sync so that newly written log
+			 * records will point recovery to after the current
+			 * unmount record.
+			 */
+			xlog_assign_atomic_lsn(&log->l_tail_lsn,
+					log->l_curr_cycle, after_umount_blk);
+			xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+					log->l_curr_cycle, after_umount_blk);
+			*tail_blk = after_umount_blk;
+			*clean = true;
+		}
+	}
+	return 0;
+}
+static void
+xlog_set_state(
+	struct xlog		*log,
+	xfs_daddr_t		head_blk,
+	struct xlog_rec_header	*rhead,
+	xfs_daddr_t		rhead_blk,
+	bool			bump_cycle)
+{
+	/*
+	 * Reset log values according to the state of the log when we
+	 * crashed.  In the case where head_blk == 0, we bump curr_cycle
+	 * one because the next write starts a new cycle rather than
+	 * continuing the cycle of the last good log record.  At this
+	 * point we have guaranteed that all partial log records have been
+	 * accounted for.  Therefore, we know that the last good log record
+	 * written was complete and ended exactly on the end boundary
+	 * of the physical log.
+	 */
+	log->l_prev_block = rhead_blk;
+	log->l_curr_block = (int)head_blk;
+	log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+	if (bump_cycle)
+		log->l_curr_cycle++;
+	atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+	atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+	xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+					BBTOB(log->l_curr_block));
+	xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+					BBTOB(log->l_curr_block));
+}
 /*
 * Find the sync block number or the tail of the log.
 *
@@ -1238,22 +1330,20 @@ xlog_find_tail(
 	xfs_daddr_t		*tail_blk)
 {
 	xlog_rec_header_t	*rhead;
-	xlog_op_header_t	*op_head;
 	char			*offset = NULL;
 	xfs_buf_t		*bp;
 	int			error;
-	xfs_daddr_t		umount_data_blk;
-	xfs_daddr_t		after_umount_blk;
 	xfs_daddr_t		rhead_blk;
 	xfs_lsn_t		tail_lsn;
-	int			hblks;
 	bool			wrapped = false;
+	bool			clean = false;
 	/*
 	 * Find previous log record
 	 */
 	if ((error = xlog_find_head(log, head_blk)))
 		return error;
+	ASSERT(*head_blk < INT_MAX);
 	bp = xlog_get_bp(log, 1);
 	if (!bp)
@@ -1271,99 +1361,74 @@ xlog_find_tail(
 	}
 	/*
-	 * Trim the head block back to skip over torn records. We can have
+	 * Search backwards through the log looking for the log record header
-	 * multiple log I/Os in flight at any time, so we assume CRC failures
+	 * block. This wraps all the way back around to the head so something is
-	 * back through the previous several records are torn writes and skip
+	 * seriously wrong if we can't find it.
-	 * them.
 	 */
-	ASSERT(*head_blk < INT_MAX);
+	error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
-	error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
+				      &rhead_blk, &rhead, &wrapped);
-				 &rhead, &wrapped);
+	if (error < 0)
-	if (error)
+		return error;
-		goto done;
+	if (!error) {
+		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+		return -EIO;
+	}
+	*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
 	/*
-	 * Reset log values according to the state of the log when we
+	 * Set the log state based on the current head record.
-	 * crashed.  In the case where head_blk == 0, we bump curr_cycle
-	 * one because the next write starts a new cycle rather than
-	 * continuing the cycle of the last good log record.  At this
-	 * point we have guaranteed that all partial log records have been
-	 * accounted for.  Therefore, we know that the last good log record
-	 * written was complete and ended exactly on the end boundary
-	 * of the physical log.
 	 */
-	log->l_prev_block = rhead_blk;
+	xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
-	log->l_curr_block = (int)*head_blk;
+	tail_lsn = atomic64_read(&log->l_tail_lsn);
-	log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-	if (wrapped)
-		log->l_curr_cycle++;
-	atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-	atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-	xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
-					BBTOB(log->l_curr_block));
-	xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
-					BBTOB(log->l_curr_block));
 	/*
-	 * Look for unmount record.  If we find it, then we know there
+	 * Look for an unmount record at the head of the log. This sets the log
-	 * was a clean unmount.  Since 'i' could be the last block in
+	 * state to determine whether recovery is necessary.
-	 * the physical log, we convert to a log block before comparing
+	 */
-	 * to the head_blk.
+	error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+				       rhead_blk, bp, &clean);
+	if (error)
+		goto done;
+	/*
+	 * Verify the log head if the log is not clean (e.g., we have anything
+	 * but an unmount record at the head). This uses CRC verification to
+	 * detect and trim torn writes. If discovered, CRC failures are
+	 * considered torn writes and the log head is trimmed accordingly.
 	 *
-	 * Save the current tail lsn to use to pass to
+	 * Note that we can only run CRC verification when the log is dirty
-	 * xlog_clear_stale_blocks() below.  We won't want to clear the
+	 * because there's no guarantee that the log data behind an unmount
-	 * unmount record if there is one, so we pass the lsn of the
+	 * record is compatible with the current architecture.
-	 * unmount record rather than the block after it.
 	 */
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+	if (!clean) {
-		int	h_size = be32_to_cpu(rhead->h_size);
+		xfs_daddr_t	orig_head = *head_blk;
-		int	h_version = be32_to_cpu(rhead->h_version);
-		if ((h_version & XLOG_VERSION_2) &&
+		error = xlog_verify_head(log, head_blk, tail_blk, bp,
-		    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+					 &rhead_blk, &rhead, &wrapped);
-			hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-			if (h_size % XLOG_HEADER_CYCLE_SIZE)
-				hblks++;
-		} else {
-			hblks = 1;
-		}
-	} else {
-		hblks = 1;
-	}
-	after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
-	after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
-	tail_lsn = atomic64_read(&log->l_tail_lsn);
-	if (*head_blk == after_umount_blk &&
-	    be32_to_cpu(rhead->h_num_logops) == 1) {
-		umount_data_blk = rhead_blk + hblks;
-		umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
-		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
 		if (error)
 			goto done;
-		op_head = (xlog_op_header_t *)offset;
+		/* update in-core state again if the head changed */
-		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+		if (*head_blk != orig_head) {
-			/*
+			xlog_set_state(log, *head_blk, rhead, rhead_blk,
-			 * Set tail and last sync so that newly written
+				       wrapped);
-			 * log records will point recovery to after the
+			tail_lsn = atomic64_read(&log->l_tail_lsn);
-			 * current unmount record.
+			error = xlog_check_unmount_rec(log, head_blk, tail_blk,
-			 */
+						       rhead, rhead_blk, bp,
-			xlog_assign_atomic_lsn(&log->l_tail_lsn,
+						       &clean);
-					log->l_curr_cycle, after_umount_blk);
+			if (error)
-			xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+				goto done;
-					log->l_curr_cycle, after_umount_blk);
-			*tail_blk = after_umount_blk;
-			/*
-			 * Note that the unmount was clean. If the unmount
-			 * was not clean, we need to know this to rebuild the
-			 * superblock counters from the perag headers if we
-			 * have a filesystem using non-persistent counters.
-			 */
-			log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
 		}
 	}
+	/*
+	 * Note that the unmount was clean. If the unmount was not clean, we
+	 * need to know this to rebuild the superblock counters from the perag
+	 * headers if we have a filesystem using non-persistent counters.
+	 */
+	if (clean)
+		log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
 	/*
 	 * Make sure that there are no blocks in front of the head
 	 * with the same cycle number as the head.  This can happen