提交 2a62ec0a 编写于 作者: L Linus Torvalds

Merge tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner:
 "This is a fix for a regression introduced in 4.5-rc1 by the new torn
  log write detection code.  The regression only affects people moving a
  clean filesystem between machines/kernels of different architecture
  (such as changing between 32 bit and 64 bit kernels), but this is the
  recommended (and only!) safe way to migrate a filesystem between
  architectures so we really need to ensure it works.

  The changes are larger than I'd prefer right at the end of the release
  cycle, but the majority of the change is just factoring code to enable
  the detection of a clean log at the correct time to avoid this issue.

  Changes:

   - Only perform torn log write detection on dirty logs.  This prevents
     failures being detected due to a clean filesystem being moved
     between machines or kernels of different architectures (e.g.  32 ->
     64 bit, BE -> LE, etc).  This fixes a regression introduced by the
     torn log write detection in 4.5-rc1"

* tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
  xfs: only run torn log write detection on dirty logs
  xfs: refactor in-core log state update to helper
  xfs: refactor unmount record detection into helper
  xfs: separate log head record discovery from verification
...@@ -1109,27 +1109,10 @@ xlog_verify_head( ...@@ -1109,27 +1109,10 @@ xlog_verify_head(
bool tmp_wrapped; bool tmp_wrapped;
/* /*
* Search backwards through the log looking for the log record header * Check the head of the log for torn writes. Search backwards from the
* block. This wraps all the way back around to the head so something is * head until we hit the tail or the maximum number of log record I/Os
* seriously wrong if we can't find it. * that could have been in flight at one time. Use a temporary buffer so
*/ * we don't trash the rhead/bp pointers from the caller.
found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
rhead, wrapped);
if (found < 0)
return found;
if (!found) {
xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
return -EIO;
}
*tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
/*
* Now that we have a tail block, check the head of the log for torn
* writes. Search again until we hit the tail or the maximum number of
* log record I/Os that could have been in flight at one time. Use a
* temporary buffer so we don't trash the rhead/bp pointer from the
* call above.
*/ */
tmp_bp = xlog_get_bp(log, 1); tmp_bp = xlog_get_bp(log, 1);
if (!tmp_bp) if (!tmp_bp)
...@@ -1215,6 +1198,115 @@ xlog_verify_head( ...@@ -1215,6 +1198,115 @@ xlog_verify_head(
return error; return error;
} }
/*
* Check whether the head of the log points to an unmount record. In other
* words, determine whether the log is clean. If so, update the in-core state
* appropriately.
*/
static int
xlog_check_unmount_rec(
struct xlog *log,
xfs_daddr_t *head_blk,
xfs_daddr_t *tail_blk,
struct xlog_rec_header *rhead,
xfs_daddr_t rhead_blk,
struct xfs_buf *bp,
bool *clean)
{
struct xlog_op_header *op_head;
xfs_daddr_t umount_data_blk;
xfs_daddr_t after_umount_blk;
int hblks;
int error;
char *offset;
*clean = false;
/*
* Look for unmount record. If we find it, then we know there was a
* clean unmount. Since 'i' could be the last block in the physical
* log, we convert to a log block before comparing to the head_blk.
*
* Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
* below. We won't want to clear the unmount record if there is one, so
* we pass the lsn of the unmount record rather than the block after it.
*/
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
int h_size = be32_to_cpu(rhead->h_size);
int h_version = be32_to_cpu(rhead->h_version);
if ((h_version & XLOG_VERSION_2) &&
(h_size > XLOG_HEADER_CYCLE_SIZE)) {
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
if (h_size % XLOG_HEADER_CYCLE_SIZE)
hblks++;
} else {
hblks = 1;
}
} else {
hblks = 1;
}
after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
umount_data_blk = rhead_blk + hblks;
umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
if (error)
return error;
op_head = (struct xlog_op_header *)offset;
if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
/*
* Set tail and last sync so that newly written log
* records will point recovery to after the current
* unmount record.
*/
xlog_assign_atomic_lsn(&log->l_tail_lsn,
log->l_curr_cycle, after_umount_blk);
xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
log->l_curr_cycle, after_umount_blk);
*tail_blk = after_umount_blk;
*clean = true;
}
}
return 0;
}
static void
xlog_set_state(
struct xlog *log,
xfs_daddr_t head_blk,
struct xlog_rec_header *rhead,
xfs_daddr_t rhead_blk,
bool bump_cycle)
{
/*
* Reset log values according to the state of the log when we
* crashed. In the case where head_blk == 0, we bump curr_cycle
* one because the next write starts a new cycle rather than
* continuing the cycle of the last good log record. At this
* point we have guaranteed that all partial log records have been
* accounted for. Therefore, we know that the last good log record
* written was complete and ended exactly on the end boundary
* of the physical log.
*/
log->l_prev_block = rhead_blk;
log->l_curr_block = (int)head_blk;
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
if (bump_cycle)
log->l_curr_cycle++;
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
BBTOB(log->l_curr_block));
xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
BBTOB(log->l_curr_block));
}
/* /*
* Find the sync block number or the tail of the log. * Find the sync block number or the tail of the log.
* *
...@@ -1238,22 +1330,20 @@ xlog_find_tail( ...@@ -1238,22 +1330,20 @@ xlog_find_tail(
xfs_daddr_t *tail_blk) xfs_daddr_t *tail_blk)
{ {
xlog_rec_header_t *rhead; xlog_rec_header_t *rhead;
xlog_op_header_t *op_head;
char *offset = NULL; char *offset = NULL;
xfs_buf_t *bp; xfs_buf_t *bp;
int error; int error;
xfs_daddr_t umount_data_blk;
xfs_daddr_t after_umount_blk;
xfs_daddr_t rhead_blk; xfs_daddr_t rhead_blk;
xfs_lsn_t tail_lsn; xfs_lsn_t tail_lsn;
int hblks;
bool wrapped = false; bool wrapped = false;
bool clean = false;
/* /*
* Find previous log record * Find previous log record
*/ */
if ((error = xlog_find_head(log, head_blk))) if ((error = xlog_find_head(log, head_blk)))
return error; return error;
ASSERT(*head_blk < INT_MAX);
bp = xlog_get_bp(log, 1); bp = xlog_get_bp(log, 1);
if (!bp) if (!bp)
...@@ -1271,99 +1361,74 @@ xlog_find_tail( ...@@ -1271,99 +1361,74 @@ xlog_find_tail(
} }
/* /*
* Trim the head block back to skip over torn records. We can have * Search backwards through the log looking for the log record header
* multiple log I/Os in flight at any time, so we assume CRC failures * block. This wraps all the way back around to the head so something is
* back through the previous several records are torn writes and skip * seriously wrong if we can't find it.
* them.
*/ */
ASSERT(*head_blk < INT_MAX); error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk, &rhead_blk, &rhead, &wrapped);
&rhead, &wrapped); if (error < 0)
if (error) return error;
goto done; if (!error) {
xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
return -EIO;
}
*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
/* /*
* Reset log values according to the state of the log when we * Set the log state based on the current head record.
* crashed. In the case where head_blk == 0, we bump curr_cycle
* one because the next write starts a new cycle rather than
* continuing the cycle of the last good log record. At this
* point we have guaranteed that all partial log records have been
* accounted for. Therefore, we know that the last good log record
* written was complete and ended exactly on the end boundary
* of the physical log.
*/ */
log->l_prev_block = rhead_blk; xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
log->l_curr_block = (int)*head_blk; tail_lsn = atomic64_read(&log->l_tail_lsn);
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
if (wrapped)
log->l_curr_cycle++;
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
BBTOB(log->l_curr_block));
xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
BBTOB(log->l_curr_block));
/* /*
* Look for unmount record. If we find it, then we know there * Look for an unmount record at the head of the log. This sets the log
* was a clean unmount. Since 'i' could be the last block in * state to determine whether recovery is necessary.
* the physical log, we convert to a log block before comparing */
* to the head_blk. error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
rhead_blk, bp, &clean);
if (error)
goto done;
/*
* Verify the log head if the log is not clean (e.g., we have anything
* but an unmount record at the head). This uses CRC verification to
* detect and trim torn writes. If discovered, CRC failures are
* considered torn writes and the log head is trimmed accordingly.
* *
* Save the current tail lsn to use to pass to * Note that we can only run CRC verification when the log is dirty
* xlog_clear_stale_blocks() below. We won't want to clear the * because there's no guarantee that the log data behind an unmount
* unmount record if there is one, so we pass the lsn of the * record is compatible with the current architecture.
* unmount record rather than the block after it.
*/ */
if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { if (!clean) {
int h_size = be32_to_cpu(rhead->h_size); xfs_daddr_t orig_head = *head_blk;
int h_version = be32_to_cpu(rhead->h_version);
if ((h_version & XLOG_VERSION_2) && error = xlog_verify_head(log, head_blk, tail_blk, bp,
(h_size > XLOG_HEADER_CYCLE_SIZE)) { &rhead_blk, &rhead, &wrapped);
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
if (h_size % XLOG_HEADER_CYCLE_SIZE)
hblks++;
} else {
hblks = 1;
}
} else {
hblks = 1;
}
after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
tail_lsn = atomic64_read(&log->l_tail_lsn);
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
umount_data_blk = rhead_blk + hblks;
umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
if (error) if (error)
goto done; goto done;
op_head = (xlog_op_header_t *)offset; /* update in-core state again if the head changed */
if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { if (*head_blk != orig_head) {
/* xlog_set_state(log, *head_blk, rhead, rhead_blk,
* Set tail and last sync so that newly written wrapped);
* log records will point recovery to after the tail_lsn = atomic64_read(&log->l_tail_lsn);
* current unmount record. error = xlog_check_unmount_rec(log, head_blk, tail_blk,
*/ rhead, rhead_blk, bp,
xlog_assign_atomic_lsn(&log->l_tail_lsn, &clean);
log->l_curr_cycle, after_umount_blk); if (error)
xlog_assign_atomic_lsn(&log->l_last_sync_lsn, goto done;
log->l_curr_cycle, after_umount_blk);
*tail_blk = after_umount_blk;
/*
* Note that the unmount was clean. If the unmount
* was not clean, we need to know this to rebuild the
* superblock counters from the perag headers if we
* have a filesystem using non-persistent counters.
*/
log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
} }
} }
/*
* Note that the unmount was clean. If the unmount was not clean, we
* need to know this to rebuild the superblock counters from the perag
* headers if we have a filesystem using non-persistent counters.
*/
if (clean)
log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
/* /*
* Make sure that there are no blocks in front of the head * Make sure that there are no blocks in front of the head
* with the same cycle number as the head. This can happen * with the same cycle number as the head. This can happen
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册