提交 d1665e41 编写于 作者: S Steven Whitehouse

[GFS2] Put back O_DIRECT support

This patch adds back O_DIRECT support with various caveats
attached:

 1. Journaled data can be read via O_DIRECT since its now the
    same on disk format as normal data files.
 2. Journaled data writes with O_DIRECT will be failed sliently
    back to normal writes (should we really do this I wonder or
    should we return an error instead?)
 3. Stuffed files will be failed back to normal buffered I/O
 4. All the usual corner cases (write beyond current end of file,
    write to an unallocated block) will also revert to normal buffered I/O.

The I/O path is slightly odd as reads arrive at the page cache layer
with the lock for the file already held, but writes arrive unlocked.
Signed-off-by: NSteven Whitehouse <swhiteho@redhat.com>
上级 fc69d0d3
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/mpage.h> #include <linux/mpage.h>
#include <linux/fs.h>
#include <asm/semaphore.h> #include <asm/semaphore.h>
#include "gfs2.h" #include "gfs2.h"
...@@ -555,30 +556,73 @@ static int gfs2_invalidatepage(struct page *page, unsigned long offset) ...@@ -555,30 +556,73 @@ static int gfs2_invalidatepage(struct page *page, unsigned long offset)
return ret; return ret;
} }
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs) loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct gfs2_inode *ip = get_v2ip(inode);
struct gfs2_holder gh;
int rv;
/*
* Shared lock, even though its write, since we do no allocation
* on this path. All we need change is atime.
*/
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
rv = gfs2_glock_nq_m_atime(1, &gh);
if (rv)
goto out;
/*
* Should we return an error here? I can't see that O_DIRECT for
* a journaled file makes any sense. For now we'll silently fall
* back to buffered I/O, likewise we do the same for stuffed
* files since they are (a) small and (b) unaligned.
*/
if (gfs2_is_jdata(ip))
goto out;
if (gfs2_is_stuffed(ip))
goto out;
rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
iov, offset, nr_segs, get_blocks_noalloc,
NULL, DIO_OWN_LOCKING);
out:
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
return rv;
}
/**
* gfs2_direct_IO
*
* This is called with a shared lock already held for the read path.
* Currently, no locks are held when the write path is called.
*/
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct gfs2_inode *ip = get_v2ip(inode); struct gfs2_inode *ip = get_v2ip(inode);
struct gfs2_sbd *sdp = ip->i_sbd; struct gfs2_sbd *sdp = ip->i_sbd;
get_blocks_t *gb = get_blocks;
atomic_inc(&sdp->sd_ops_address); atomic_inc(&sdp->sd_ops_address);
if (gfs2_is_jdata(ip)) if (rw == WRITE)
return -EINVAL; return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
if (rw == WRITE) { if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
return -EOPNOTSUPP; /* for now */ gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
} else { return -EINVAL;
if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
return -EINVAL;
}
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gb, NULL); offset, nr_segs, get_blocks, NULL,
DIO_OWN_LOCKING);
} }
struct address_space_operations gfs2_file_aops = { struct address_space_operations gfs2_file_aops = {
......
...@@ -176,16 +176,16 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb, ...@@ -176,16 +176,16 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
* If any segment has a negative length, or the cumulative * If any segment has a negative length, or the cumulative
* length ever wraps negative then return -EINVAL. * length ever wraps negative then return -EINVAL.
*/ */
count += iv->iov_len; count += iv->iov_len;
if (unlikely((ssize_t)(count|iv->iov_len) < 0)) if (unlikely((ssize_t)(count|iv->iov_len) < 0))
return -EINVAL; return -EINVAL;
if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
continue; continue;
if (seg == 0) if (seg == 0)
return -EFAULT; return -EFAULT;
nr_segs = seg; nr_segs = seg;
count -= iv->iov_len; /* This segment is no good */ count -= iv->iov_len; /* This segment is no good */
break; break;
} }
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
...@@ -204,10 +204,14 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb, ...@@ -204,10 +204,14 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
retval = gfs2_glock_nq_m_atime(1, &gh); retval = gfs2_glock_nq_m_atime(1, &gh);
if (retval) if (retval)
goto out; goto out;
if (gfs2_is_stuffed(ip)) {
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
goto fallback_to_normal;
}
size = i_size_read(inode); size = i_size_read(inode);
if (pos < size) { if (pos < size) {
retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs); retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
if (retval > 0 && !is_sync_kiocb(iocb)) if (retval > 0 && !is_sync_kiocb(iocb))
retval = -EIOCBQUEUED; retval = -EIOCBQUEUED;
if (retval > 0) if (retval > 0)
...@@ -219,6 +223,7 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb, ...@@ -219,6 +223,7 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
goto out; goto out;
} }
fallback_to_normal:
retval = 0; retval = 0;
if (count) { if (count) {
for (seg = 0; seg < nr_segs; seg++) { for (seg = 0; seg < nr_segs; seg++) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册