diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 1f7ae144f6d89bc351c8aa34076262bf366f4a4d..5393e6611691617db6c933a945df25baa0bd1090 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file reservations - users should rarely need to change this value. If allocation reservations are turned off, this option will have no effect. +coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode + lock will be taken to force other nodes drop cache, + therefore full cluster coherency is guaranteed even + for O_DIRECT writes. +coherency=buffered Allow concurrent O_DIRECT writes without EX lock among + nodes, which gains high performance at risk of getting + stale data on other nodes. diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 13af9937bdda15ede9c9e9a42c8eaa3e59964806..9e8cc4346b761e6246494416787b5b78c193f01e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2225,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_path.dentry->d_inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int full_coherency = !(osb->s_mount_opt & + OCFS2_MOUNT_COHERENCY_BUFFERED); mlog_entry("(0x%p, %u, '%.*s')\n", file, (unsigned int)nr_segs, @@ -2248,14 +2250,37 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, have_alloc_sem = 1; } - /* concurrent O_DIRECT writes are allowed */ - rw_level = !direct_io; + /* + * Concurrent O_DIRECT writes are allowed with + * mount_option "coherency=buffered". + */ + rw_level = (!direct_io || full_coherency); + ret = ocfs2_rw_lock(inode, rw_level); if (ret < 0) { mlog_errno(ret); goto out_sems; } + /* + * O_DIRECT writes with "coherency=full" need to take EX cluster + * inode_lock to guarantee coherency. + */ + if (direct_io && full_coherency) { + /* + * We need to take and drop the inode lock to force + * other nodes to drop their caches. Buffered I/O + * already does this in write_begin(). + */ + ret = ocfs2_inode_lock(inode, NULL, 1); + if (ret < 0) { + mlog_errno(ret); + goto out_sems; + } + + ocfs2_inode_unlock(inode, 1); + } + can_do_direct = direct_io; ret = ocfs2_prepare_inode_for_write(file, ppos, iocb->ki_left, appending, diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 687e291d73f29ea2a56ea9e874955fa9b68bca94..3064feef143039f216d3b77235c2925fe6d81109 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -263,6 +263,9 @@ enum ocfs2_mount_options control lists */ OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ + + OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12 /* Allow concurrent O_DIRECT + writes */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index b578644b6637823819aae19cd8d142ad20d52fa1..9122d59f8127c6768d14ffe7c1ab48fa2a9e056a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -177,6 +177,8 @@ enum { Opt_noacl, Opt_usrquota, Opt_grpquota, + Opt_coherency_buffered, + Opt_coherency_full, Opt_resv_level, Opt_dir_resv_level, Opt_err, @@ -205,6 +207,8 @@ static const match_table_t tokens = { {Opt_noacl, "noacl"}, {Opt_usrquota, "usrquota"}, {Opt_grpquota, "grpquota"}, + {Opt_coherency_buffered, "coherency=buffered"}, + {Opt_coherency_full, "coherency=full"}, {Opt_resv_level, "resv_level=%u"}, {Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_err, NULL} @@ -1452,6 +1456,12 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_grpquota: mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; break; + case Opt_coherency_buffered: + mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; + break; + case Opt_coherency_full: + mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; + break; case Opt_acl: mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; @@ -1550,6 +1560,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (opts & OCFS2_MOUNT_GRPQUOTA) seq_printf(s, ",grpquota"); + if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) + seq_printf(s, ",coherency=buffered"); + else + seq_printf(s, ",coherency=full"); + if (opts & OCFS2_MOUNT_NOUSERXATTR) seq_printf(s, ",nouser_xattr"); else