xfs: make log devices with write back caches work

There's no reason not to support cache flushing on external log devices. The only thing this really requires is flushing the data device first both in fsync and log commits. A side effect is that we also have to remove the barrier write test during mount, which has been superflous since the new FLUSH+FUA code anyway. Also use the chance to flush the RT subvolume write cache before the fsync commit, which is required for correct semantics. Signed-off-by: N Christoph Hellwig <hch@lst.de> Signed-off-by: N Alex Elder <aelder@sgi.com>

xfs: make log devices with write back caches work
There's no reason not to support cache flushing on external log devices. The only thing this really requires is flushing the data device first both in fsync and log commits. A side effect is that we also have to remove the barrier write test during mount, which has been superflous since the new FLUSH+FUA code anyway. Also use the chance to flush the RT subvolume write cache before the fsync commit, which is required for correct semantics. Signed-off-by: N Christoph Hellwig <hch@lst.de> Signed-off-by: N Alex Elder <aelder@sgi.com>
a27a263b · Christoph Hellwig · Alex Elder · c46a131c · a27a263b · a27a263b
隐藏空白更改
内联并排

Showing with 41 addition and 95 deletion

fs/xfs/linux-2.6/xfs_file.c fs/xfs/linux-2.6/xfs_file.c +31 -19

fs/xfs/linux-2.6/xfs_super.c fs/xfs/linux-2.6/xfs_super.c +0 -75

fs/xfs/xfs_log.c fs/xfs/xfs_log.c +10 -1

未找到文件。
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -131,19 +131,34 @@ xfs_file_fsync(
 {
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
 	int			error = 0;
 	int			log_flushed = 0;
 	trace_xfs_file_fsync(ip);
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (XFS_FORCED_SHUTDOWN(mp))
 		return -XFS_ERROR(EIO);
 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
 	xfs_ioend_wait(ip);
+	if (mp->m_flags & XFS_MOUNT_BARRIER) {
+		/*
+		 * If we have an RT and/or log subvolume we need to make sure
+		 * to flush the write cache the device used for file data
+		 * first.  This is to ensure newly written file data make
+		 * it to disk before logging the new inode size in case of
+		 * an extending write.
+		 */
+		if (XFS_IS_REALTIME_INODE(ip))
+			xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+		else if (mp->m_logdev_targp != mp->m_ddev_targp)
+			xfs_blkdev_issue_flush(mp->m_ddev_targp);
+	}
 	/*
 	 * We always need to make sure that the required inode state is safe on
 	 * disk.  The inode might be clean but we still might need to force the
@@ -175,9 +190,9 @@ xfs_file_fsync(
 		 * updates.  The sync transaction will also force the log.
 		 */
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
-		tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
+		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
 		error = xfs_trans_reserve(tp, 0,
-				XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
+				XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
 		if (error) {
 			xfs_trans_cancel(tp, 0);
 			return -error;
@@ -209,28 +224,25 @@ xfs_file_fsync(
 		 * force the log.
 		 */
 		if (xfs_ipincount(ip)) {
-			error = _xfs_log_force_lsn(ip->i_mount,
+			error = _xfs_log_force_lsn(mp,
 					ip->i_itemp->ili_last_lsn,
 					XFS_LOG_SYNC, &log_flushed);
 		}
 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
 	}
-	if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
+	/*
-		/*
+	 * If we only have a single device, and the log force about was
-		 * If the log write didn't issue an ordered tag we need
+	 * a no-op we might have to flush the data device cache here.
-		 * to flush the disk cache for the data device now.
+	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
-		 */
+	 * an already allocated file and thus do not have any metadata to
-		if (!log_flushed)
+	 * commit.
-			xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
+	 */
+	if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
-		/*
+	    mp->m_logdev_targp == mp->m_ddev_targp &&
-		 * If this inode is on the RT dev we need to flush that
+	    !XFS_IS_REALTIME_INODE(ip) &&
-		 * cache as well.
+	    !log_flushed)
-		 */
+		xfs_blkdev_issue_flush(mp->m_ddev_targp);
-		if (XFS_IS_REALTIME_INODE(ip))
-			xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
-	}
 	return -error;
 }

--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -627,68 +627,6 @@ xfs_blkdev_put(
 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 }
-/*
- * Try to write out the superblock using barriers.
- */
-STATIC int
-xfs_barrier_test(
-	xfs_mount_t	*mp)
-{
-	xfs_buf_t	*sbp = xfs_getsb(mp, 0);
-	int		error;
-	XFS_BUF_UNDONE(sbp);
-	XFS_BUF_UNREAD(sbp);
-	XFS_BUF_UNDELAYWRITE(sbp);
-	XFS_BUF_WRITE(sbp);
-	XFS_BUF_UNASYNC(sbp);
-	XFS_BUF_ORDERED(sbp);
-	xfsbdstrat(mp, sbp);
-	error = xfs_buf_iowait(sbp);
-	/*
-	 * Clear all the flags we set and possible error state in the
-	 * buffer.  We only did the write to try out whether barriers
-	 * worked and shouldn't leave any traces in the superblock
-	 * buffer.
-	 */
-	XFS_BUF_DONE(sbp);
-	XFS_BUF_ERROR(sbp, 0);
-	XFS_BUF_UNORDERED(sbp);
-	xfs_buf_relse(sbp);
-	return error;
-}
-STATIC void
-xfs_mountfs_check_barriers(xfs_mount_t *mp)
-{
-	int error;
-	if (mp->m_logdev_targp != mp->m_ddev_targp) {
-		xfs_notice(mp,
-		  "Disabling barriers, not supported with external log device");
-		mp->m_flags &= ~XFS_MOUNT_BARRIER;
-		return;
-	}
-	if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
-		xfs_notice(mp,
-			"Disabling barriers, underlying device is readonly");
-		mp->m_flags &= ~XFS_MOUNT_BARRIER;
-		return;
-	}
-	error = xfs_barrier_test(mp);
-	if (error) {
-		xfs_notice(mp,
-			"Disabling barriers, trial barrier write failed");
-		mp->m_flags &= ~XFS_MOUNT_BARRIER;
-		return;
-	}
-}
 void
 xfs_blkdev_issue_flush(
 	xfs_buftarg_t		*buftarg)
@@ -1240,14 +1178,6 @@ xfs_fs_remount(
 		switch (token) {
 		case Opt_barrier:
 			mp->m_flags |= XFS_MOUNT_BARRIER;
-			/*
-			 * Test if barriers are actually working if we can,
-			 * else delay this check until the filesystem is
-			 * marked writeable.
-			 */
-			if (!(mp->m_flags & XFS_MOUNT_RDONLY))
-				xfs_mountfs_check_barriers(mp);
 			break;
 		case Opt_nobarrier:
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1212,6 @@ xfs_fs_remount(
 	/* ro -> rw */
 	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
 		mp->m_flags &= ~XFS_MOUNT_RDONLY;
-		if (mp->m_flags & XFS_MOUNT_BARRIER)
-			xfs_mountfs_check_barriers(mp);
 		/*
 		 * If this is the first remount to writeable state we
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
 	if (error)
 		goto out_free_sb;
-	if (mp->m_flags & XFS_MOUNT_BARRIER)
-		xfs_mountfs_check_barriers(mp);
 	error = xfs_filestream_mount(mp);
 	if (error)
 		goto out_free_sb;

--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t		*log,
 	XFS_BUF_ASYNC(bp);
 	bp->b_flags |= XBF_LOG_BUFFER;
-	if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+	if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
+		/*
+		 * If we have an external log device, flush the data device
+		 * before flushing the log to make sure all meta data
+		 * written back from the AIL actually made it to disk
+		 * before writing out the new log tail LSN in the log buffer.
+		 */
+		if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+			xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
 		XFS_BUF_ORDERED(bp);
+	}
 	ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
 	ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);