diff --git a/fs/inode.c b/fs/inode.c index d9a21d1229261313e37143472b66ae59e8455ae2..26cdb115ce67d5361222d39cf5e94039db654eec 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode) wake_up_bit(&inode->i_state, __I_LOCK); } +/* + * We rarely want to lock two inodes that do not have a parent/child + * relationship (such as directory, child inode) simultaneously. The + * vast majority of file systems should be able to get along fine + * without this. Do not use these functions except as a last resort. + */ +void inode_double_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { + if (inode1) + mutex_lock(&inode1->i_mutex); + else if (inode2) + mutex_lock(&inode2->i_mutex); + return; + } + + if (inode1 < inode2) { + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); + } +} +EXPORT_SYMBOL(inode_double_lock); + +void inode_double_unlock(struct inode *inode1, struct inode *inode2) +{ + if (inode1) + mutex_unlock(&inode1->i_mutex); + + if (inode2 && inode2 != inode1) + mutex_unlock(&inode2->i_mutex); +} +EXPORT_SYMBOL(inode_double_unlock); + static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { diff --git a/fs/splice.c b/fs/splice.c index a567010b62ac52e84099f584908629ceae806128..c1072b6940c3cc67402b6b1a251493f8472eb20a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -713,6 +713,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, { int ret, do_wakeup, err; struct splice_desc sd; + struct inode *inode = out->f_mapping->host; ret = 0; do_wakeup = 0; @@ -722,8 +723,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, sd.file = out; sd.pos = *ppos; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + /* + * The actor worker might be calling ->prepare_write and + * ->commit_write. Most of the time, these expect i_mutex to + * be held. Since this may result in an ABBA deadlock with + * pipe->inode, we have to order lock acquiry here. + */ + inode_double_lock(inode, pipe->inode); for (;;) { if (pipe->nrbufs) { @@ -797,8 +803,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, pipe_wait(pipe); } - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + inode_double_unlock(inode, pipe->inode); if (do_wakeup) { smp_mb(); @@ -1400,13 +1405,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, * grabbing by inode address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - if (ipipe->inode < opipe->inode) { - mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); - } + inode_double_lock(ipipe->inode, opipe->inode); do { if (!opipe->readers) { @@ -1450,8 +1449,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, i++; } while (len); - mutex_unlock(&ipipe->inode->i_mutex); - mutex_unlock(&opipe->inode->i_mutex); + inode_double_unlock(ipipe->inode, opipe->inode); /* * If we put data in the output pipe, wakeup any potential readers. diff --git a/include/linux/fs.h b/include/linux/fs.h index 661c7c572149207a8c3b237f8f0504d446418446..853a02f23936ad9797ae1e7b0672d2c8e1c34780 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; +extern void inode_double_lock(struct inode *inode1, struct inode *inode2); +extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); + /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic