xfs: fix mounting failed caused by sequencing problem in the log records

Offering: HULK hulk inclusion category: bugfix bugzilla: 188870, https://gitee.com/openeuler/kernel/issues/I76JSK -------------------------------- During the test of growfs + power-off, we encountered a mounting failure issue. The specific call stack is as follows: [584505.210179] XFS (loop0): xfs_buf_find: daddr 0x6d6002 out of range, EOFS 0x6d6000 ... [584505.210739] Call Trace: [584505.210776] xfs_buf_get_map+0x44/0x230 [xfs] [584505.210780] ? trace_event_buffer_commit+0x57/0x140 [584505.210818] xfs_buf_read_map+0x54/0x280 [xfs] [584505.210858] ? xlog_recover_items_pass2+0x53/0xb0 [xfs] [584505.210899] xlog_recover_buf_commit_pass2+0x112/0x440 [xfs] [584505.210939] ? xlog_recover_items_pass2+0x53/0xb0 [xfs] [584505.210980] xlog_recover_items_pass2+0x53/0xb0 [xfs] [584505.211020] xlog_recover_commit_trans+0x2ca/0x320 [xfs] [584505.211061] xlog_recovery_process_trans+0xc6/0xf0 [xfs] [584505.211101] xlog_recover_process_data+0x9e/0x110 [xfs] [584505.211141] xlog_do_recovery_pass+0x3b4/0x5c0 [xfs] [584505.211181] xlog_do_log_recovery+0x5e/0x80 [xfs] [584505.211223] xlog_do_recover+0x33/0x1a0 [xfs] [584505.211262] xlog_recover+0xd7/0x170 [xfs] [584505.211303] xfs_log_mount+0x217/0x2b0 [xfs] [584505.211341] xfs_mountfs+0x3da/0x870 [xfs] [584505.211384] xfs_fc_fill_super+0x3fa/0x7a0 [xfs] [584505.211428] ? xfs_setup_devices+0x80/0x80 [xfs] [584505.211432] get_tree_bdev+0x16f/0x260 [584505.211434] vfs_get_tree+0x25/0xc0 [584505.211436] do_new_mount+0x156/0x1b0 [584505.211438] __se_sys_mount+0x165/0x1d0 [584505.211440] do_syscall_64+0x33/0x40 [584505.211442] entry_SYSCALL_64_after_hwframe+0x61/0xc6 After analyzing the log records, we have discovered the following content: ============================================================================ cycle: 173 version: 2 lsn: 173,2742 tail_lsn: 173,1243 length of Log Record: 25600 prev offset: 2702 num ops: 258 uuid: fb958458-48a3-4c76-ae23-7a1cf3053065 format: little endian linux h_size: 32768 ---------------------------------------------------------------------------- ... ---------------------------------------------------------------------------- Oper (100): tid: 1c010724 len: 24 clientid: TRANS flags: none BUF: #regs: 2 start blkno: 7168002 (0x6d6002) len: 1 bmap size: 1 flags: 0x3800 Oper (101): tid: 1c010724 len: 128 clientid: TRANS flags: none AGI Buffer: XAGI ver: 1 seq#: 28 len: 2048 cnt: 0 root: 3 level: 1 free#: 0x0 newino: 0x140 bucket[0 - 3]: 0xffffffff 0xffffffff 0xffffffff 0xffffffff bucket[4 - 7]: 0xffffffff 0xffffffff 0xffffffff 0xffffffff bucket[8 - 11]: 0xffffffff 0xffffffff 0xffffffff 0xffffffff bucket[12 - 15]: 0xffffffff 0xffffffff 0xffffffff 0xffffffff bucket[16 - 19]: 0xffffffff ---------------------------------------------------------------------------- ... ---------------------------------------------------------------------------- Oper (108): tid: 1c010724 len: 24 clientid: TRANS flags: none BUF: #regs: 2 start blkno: 0 (0x0) len: 1 bmap size: 1 flags: 0x9000 Oper (109): tid: 1c010724 len: 384 clientid: TRANS flags: none SUPER BLOCK Buffer: icount: 6360863066640355328 ifree: 898048 fdblks: 0 frext: 0 ---------------------------------------------------------------------------- ... We found that in the log records, the modification transaction for the expanded block is before the growfs transaction, which leads to verification failure during log replay. We need to ensure that when replaying logs, transactions related to the superblock are replayed first. Signed-off-by: N Wu Guanghao <wuguanghao3@huawei.com> Signed-off-by: N yangerkun <yangerkun@huawei.com> Signed-off-by: N Long Li <leo.lilong@huawei.com> (cherry picked from commit dba19fb8)

xfs: fix mounting failed caused by sequencing problem in the log records
Offering: HULK hulk inclusion category: bugfix bugzilla: 188870, https://gitee.com/openeuler/kernel/issues/I76JSK -------------------------------- During the test of growfs + power-off, we encountered a mounting failure issue. The specific call stack is as follows: [584505.210179] XFS (loop0): xfs_buf_find: daddr 0x6d6002 out of range, EOFS 0x6d6000 ... [584505.210739] Call Trace: [584505.210776] xfs_buf_get_map+0x44/0x230 [xfs] [584505.210780] ? trace_event_buffer_commit+0x57/0x140 [584505.210818] xfs_buf_read_map+0x54/0x280 [xfs] [584505.210858] ? xlog_recover_items_pass2+0x53/0xb0 [xfs] [584505.210899] xlog_recover_buf_commit_pass2+0x112/0x440 [xfs] [584505.210939] ? xlog_recover_items_pass2+0x53/0xb0 [xfs] [584505.210980] xlog_recover_items_pass2+0x53/0xb0 [xfs] [584505.211020] xlog_recover_commit_trans+0x2ca/0x320 [xfs] [584505.211061] xlog_recovery_process_trans+0xc6/0xf0 [xfs] [584505.211101] xlog_recover_process_data+0x9e/0x110 [xfs] [584505.211141] xlog_do_recovery_pass+0x3b4/0x5c0 [xfs] [584505.211181] xlog_do_log_recovery+0x5e/0x80 [xfs] [584505.211223] xlog_do_recover+0x33/0x1a0 [xfs] [584505.211262] xlog_recover+0xd7/0x170 [xfs] [584505.211303] xfs_log_mount+0x217/0x2b0 [xfs] [584505.211341] xfs_mountfs+0x3da/0x870 [xfs] [584505.211384] xfs_fc_fill_super+0x3fa/0x7a0 [xfs] [584505.211428] ? xfs_setup_devices+0x80/0x80 [xfs] [584505.211432] get_tree_bdev+0x16f/0x260 [584505.211434] vfs_get_tree+0x25/0xc0 [584505.211436] do_new_mount+0x156/0x1b0 [584505.211438] __se_sys_mount+0x165/0x1d0 [584505.211440] do_syscall_64+0x33/0x40 [584505.211442] entry_SYSCALL_64_after_hwframe+0x61/0xc6 After analyzing the log records, we have discovered the following content: ============================================================================ cycle: 173 version: 2 lsn: 173,2742 tail_lsn: 173,1243 length of Log Record: 25600 prev offset: 2702 num ops: 258 uuid: fb958458-48a3-4c76-ae23-7a1cf3053065 format: little endian linux h_size: 32768 ---------------------------------------------------------------------------- ... ---------------------------------------------------------------------------- Oper (100): tid: 1c010724 len: 24 clientid: TRANS flags: none BUF: #regs: 2 start blkno: 7168002 (0x6d6002) len: 1 bmap size: 1 flags: 0x3800 Oper (101): tid: 1c010724 len: 128 clientid: TRANS flags: none AGI Buffer: XAGI ver: 1 seq#: 28 len: 2048 cnt: 0 root: 3 level: 1 free#: 0x0 newino: 0x140 bucket[0 - 3]: 0xffffffff 0xffffffff 0xffffffff 0xffffffff bucket[4 - 7]: 0xffffffff 0xffffffff 0xffffffff 0xffffffff bucket[8 - 11]: 0xffffffff 0xffffffff 0xffffffff 0xffffffff bucket[12 - 15]: 0xffffffff 0xffffffff 0xffffffff 0xffffffff bucket[16 - 19]: 0xffffffff ---------------------------------------------------------------------------- ... ---------------------------------------------------------------------------- Oper (108): tid: 1c010724 len: 24 clientid: TRANS flags: none BUF: #regs: 2 start blkno: 0 (0x0) len: 1 bmap size: 1 flags: 0x9000 Oper (109): tid: 1c010724 len: 384 clientid: TRANS flags: none SUPER BLOCK Buffer: icount: 6360863066640355328 ifree: 898048 fdblks: 0 frext: 0 ---------------------------------------------------------------------------- ... We found that in the log records, the modification transaction for the expanded block is before the growfs transaction, which leads to verification failure during log replay. We need to ensure that when replaying logs, transactions related to the superblock are replayed first. Signed-off-by: N Wu Guanghao <wuguanghao3@huawei.com> Signed-off-by: N yangerkun <yangerkun@huawei.com> Signed-off-by: N Long Li <leo.lilong@huawei.com> (cherry picked from commit dba19fb8)
c1a7d259 · yangerkun · openeuler-sync-bot · 6e31072c · c1a7d259 · c1a7d259
Showing with 26 addition and 3 deletion

fs/xfs/libxfs/xfs_log_recover.h fs/xfs/libxfs/xfs_log_recover.h +1 -0

fs/xfs/xfs_buf_item_recover.c fs/xfs/xfs_buf_item_recover.c +2 -0

fs/xfs/xfs_log_recover.c fs/xfs/xfs_log_recover.c +23 -3

未找到文件。
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -18,6 +18,7 @@ enum xlog_recover_reorder {
 	XLOG_REORDER_ITEM_LIST,
 	XLOG_REORDER_INODE_BUFFER_LIST,
 	XLOG_REORDER_CANCEL_LIST,
+	XLOG_REORDER_SB_BUFFER_LIST,
 };

 struct xlog_recover_item_ops {

--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -162,6 +162,8 @@ xlog_recover_buf_reorder(
 		return XLOG_REORDER_CANCEL_LIST;
 	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
 		return XLOG_REORDER_INODE_BUFFER_LIST;
+	if (buf_f->blf_blkno == XFS_SB_DADDR)
+		return XLOG_REORDER_SB_BUFFER_LIST;
 	return XLOG_REORDER_BUFFER_LIST;
 }


--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1879,6 +1879,9 @@ xlog_recover_reorder_trans(
 		case XLOG_REORDER_BUFFER_LIST:
 			list_move_tail(&item->ri_list, &buffer_list);
 			break;
+		case XLOG_REORDER_SB_BUFFER_LIST:
+			list_move(&item->ri_list, &buffer_list);
+			break;
 		case XLOG_REORDER_CANCEL_LIST:
 			trace_xfs_log_recover_item_reorder_head(log,
 					trans, item, pass);
@@ -1942,6 +1945,25 @@ xlog_recover_items_pass2(
 	return error;
 }

+#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
+static inline bool
+xlog_recover_should_pass2(
+	struct xlog_recover_item	*item,
+	int				items_queued)
+{
+	struct xfs_buf_log_format	*buf_f;
+
+	if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX)
+		return true;
+	if (ITEM_TYPE(item) == XFS_LI_BUF) {
+		buf_f = item->ri_buf[0].i_addr;
+		if (buf_f->blf_blkno == XFS_SB_DADDR)
+			return true;
+	}
+
+	return false;
+}
+
 /*
 * Perform the transaction.
 *
@@ -1962,8 +1984,6 @@ xlog_recover_commit_trans(
 	LIST_HEAD			(ra_list);
 	LIST_HEAD			(done_list);

-	#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
-
 	hlist_del_init(&trans->r_list);

 	error = xlog_recover_reorder_trans(log, trans, pass);
@@ -1983,7 +2003,7 @@ xlog_recover_commit_trans(
 				item->ri_ops->ra_pass2(log, item);
 			list_move_tail(&item->ri_list, &ra_list);
 			items_queued++;
-			if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
+			if (xlog_recover_should_pass2(item, items_queued)) {
 				error = xlog_recover_items_pass2(log, trans,
 						buffer_list, &ra_list);
 				list_splice_tail_init(&ra_list, &done_list);