提交 62752ee1 编写于 作者: M Mark Fasheh 提交者: Jens Axboe

[PATCH] Take i_mutex in splice_from_pipe()

The splice_actor may be calling ->prepare_write() and ->commit_write(). We
want i_mutex on the inode being written to before calling those so that we
don't race i_size changes.

The double locking behavior is done elsewhere in splice.c, and if we
eventually want _nolock variants of generic_file_splice_write(), fs modules
might have to replicate the nasty locking code. We introduce
inode_double_lock() and inode_double_unlock() to consolidate the locking
rules into one set of functions.
Signed-off-by: NMark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: NJens Axboe <jens.axboe@oracle.com>
上级 ce9e3d99
...@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode) ...@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode)
wake_up_bit(&inode->i_state, __I_LOCK); wake_up_bit(&inode->i_state, __I_LOCK);
} }
/*
* We rarely want to lock two inodes that do not have a parent/child
* relationship (such as directory, child inode) simultaneously. The
* vast majority of file systems should be able to get along fine
* without this. Do not use these functions except as a last resort.
*/
void inode_double_lock(struct inode *inode1, struct inode *inode2)
{
if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
if (inode1)
mutex_lock(&inode1->i_mutex);
else if (inode2)
mutex_lock(&inode2->i_mutex);
return;
}
if (inode1 < inode2) {
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
}
}
EXPORT_SYMBOL(inode_double_lock);
void inode_double_unlock(struct inode *inode1, struct inode *inode2)
{
if (inode1)
mutex_unlock(&inode1->i_mutex);
if (inode2 && inode2 != inode1)
mutex_unlock(&inode2->i_mutex);
}
EXPORT_SYMBOL(inode_double_unlock);
static __initdata unsigned long ihash_entries; static __initdata unsigned long ihash_entries;
static int __init set_ihash_entries(char *str) static int __init set_ihash_entries(char *str)
{ {
......
...@@ -713,6 +713,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, ...@@ -713,6 +713,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
{ {
int ret, do_wakeup, err; int ret, do_wakeup, err;
struct splice_desc sd; struct splice_desc sd;
struct inode *inode = out->f_mapping->host;
ret = 0; ret = 0;
do_wakeup = 0; do_wakeup = 0;
...@@ -722,8 +723,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, ...@@ -722,8 +723,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
sd.file = out; sd.file = out;
sd.pos = *ppos; sd.pos = *ppos;
if (pipe->inode) /*
mutex_lock(&pipe->inode->i_mutex); * The actor worker might be calling ->prepare_write and
* ->commit_write. Most of the time, these expect i_mutex to
* be held. Since this may result in an ABBA deadlock with
* pipe->inode, we have to order lock acquiry here.
*/
inode_double_lock(inode, pipe->inode);
for (;;) { for (;;) {
if (pipe->nrbufs) { if (pipe->nrbufs) {
...@@ -797,8 +803,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, ...@@ -797,8 +803,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
pipe_wait(pipe); pipe_wait(pipe);
} }
if (pipe->inode) inode_double_unlock(inode, pipe->inode);
mutex_unlock(&pipe->inode->i_mutex);
if (do_wakeup) { if (do_wakeup) {
smp_mb(); smp_mb();
...@@ -1400,13 +1405,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1400,13 +1405,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* grabbing by inode address. Otherwise two different processes * grabbing by inode address. Otherwise two different processes
* could deadlock (one doing tee from A -> B, the other from B -> A). * could deadlock (one doing tee from A -> B, the other from B -> A).
*/ */
if (ipipe->inode < opipe->inode) { inode_double_lock(ipipe->inode, opipe->inode);
mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
}
do { do {
if (!opipe->readers) { if (!opipe->readers) {
...@@ -1450,8 +1449,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1450,8 +1449,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
i++; i++;
} while (len); } while (len);
mutex_unlock(&ipipe->inode->i_mutex); inode_double_unlock(ipipe->inode, opipe->inode);
mutex_unlock(&opipe->inode->i_mutex);
/* /*
* If we put data in the output pipe, wakeup any potential readers. * If we put data in the output pipe, wakeup any potential readers.
......
...@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class ...@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class
I_MUTEX_QUOTA I_MUTEX_QUOTA
}; };
extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
/* /*
* NOTE: in a 32bit arch with a preemptable kernel and * NOTE: in a 32bit arch with a preemptable kernel and
* an UP compile the i_size_read/write must be atomic * an UP compile the i_size_read/write must be atomic
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册