提交 38851cc1 编写于 作者: M Miao Xie 提交者: Josef Bacik

Btrfs: implement unlocked dio write

This idea is from ext4. By this patch, we can make the dio write parallel,
and improve the performance. But because we can not update isize without
i_mutex, the unlocked dio write just can be done in front of the EOF.

We needn't worry about the race between dio write and truncate, because the
truncate need wait untill all the dio write end.

And we also needn't worry about the race between dio write and punch hole,
because we have extent lock to protect our operation.

I ran fio to test the performance of this feature.

== Hardware ==
CPU: Intel(R) Core(TM)2 Duo CPU     E7500  @ 2.93GHz
Mem: 2GB
SSD: Intel X25-M 120GB (Test Partition: 60GB)

== config file ==
[global]
ioengine=psync
direct=1
bs=4k
size=32G
runtime=60
directory=/mnt/btrfs/
filename=testfile
group_reporting
thread

[file1]
numjobs=1 # 2 4
rw=randwrite

== result (KBps) ==
write	1	2	4
lock	24936	24738	24726
nolock	24962	30866	32101

== result (iops) ==
write	1	2	4
lock	6234	6184	6181
nolock	6240	7716	8025
Signed-off-by: NMiao Xie <miaox@cn.fujitsu.com>
Signed-off-by: NJosef Bacik <jbacik@fusionio.com>
上级 2e60a51e
...@@ -6677,28 +6677,36 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ...@@ -6677,28 +6677,36 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
size_t count = 0; size_t count = 0;
int flags = 0; int flags = 0;
bool wakeup = false; bool wakeup = true;
bool relock = false;
ssize_t ret; ssize_t ret;
if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
offset, nr_segs)) offset, nr_segs))
return 0; return 0;
atomic_inc(&inode->i_dio_count);
smp_mb__after_atomic_inc();
if (rw & WRITE) { if (rw & WRITE) {
count = iov_length(iov, nr_segs); count = iov_length(iov, nr_segs);
/*
* If the write DIO is beyond the EOF, we need update
* the isize, but it is protected by i_mutex. So we can
* not unlock the i_mutex at this case.
*/
if (offset + count <= inode->i_size) {
mutex_unlock(&inode->i_mutex);
relock = true;
}
ret = btrfs_delalloc_reserve_space(inode, count); ret = btrfs_delalloc_reserve_space(inode, count);
if (ret) if (ret)
return ret; goto out;
} else { } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
atomic_inc(&inode->i_dio_count); &BTRFS_I(inode)->runtime_flags))) {
smp_mb__after_atomic_inc(); inode_dio_done(inode);
if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, flags = DIO_LOCKING | DIO_SKIP_HOLES;
&BTRFS_I(inode)->runtime_flags))) { wakeup = false;
inode_dio_done(inode);
flags = DIO_LOCKING | DIO_SKIP_HOLES;
} else {
wakeup = true;
}
} }
ret = __blockdev_direct_IO(rw, iocb, inode, ret = __blockdev_direct_IO(rw, iocb, inode,
...@@ -6717,8 +6725,11 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ...@@ -6717,8 +6725,11 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
} }
btrfs_delalloc_release_metadata(inode, 0); btrfs_delalloc_release_metadata(inode, 0);
} }
out:
if (wakeup) if (wakeup)
inode_dio_done(inode); inode_dio_done(inode);
if (relock)
mutex_lock(&inode->i_mutex);
return ret; return ret;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册