提交 b21996e3 编写于 作者: J J. Bruce Fields 提交者: Al Viro

locks: break delegations on unlink

We need to break delegations on any operation that changes the set of
links pointing to an inode.  Start with unlink.

Such operations also hold the i_mutex on a parent directory.  Breaking a
delegation may require waiting for a timeout (by default 90 seconds) in
the case of a unresponsive NFS client.  To avoid blocking all directory
operations, we therefore drop locks before waiting for the delegation.
The logic then looks like:

	acquire locks
	...
	test for delegation; if found:
		take reference on inode
		release locks
		wait for delegation break
		drop reference on inode
		retry

It is possible this could never terminate.  (Even if we take precautions
to prevent another delegation being acquired on the same inode, we could
get a different inode on each retry.)  But this seems very unlikely.

The initial test for a delegation happens after the lock on the target
inode is acquired, but the directory inode may have been acquired
further up the call stack.  We therefore add a "struct inode **"
argument to any intervening functions, which we use to pass the inode
back up to the caller in the case it needs a delegation synchronously
broken.

Cc: David Howells <dhowells@redhat.com>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: NJeff Layton <jlayton@redhat.com>
Signed-off-by: NJ. Bruce Fields <bfields@redhat.com>
Signed-off-by: NAl Viro <viro@zeniv.linux.org.uk>
上级 9accbb97
...@@ -324,7 +324,7 @@ static int handle_remove(const char *nodename, struct device *dev) ...@@ -324,7 +324,7 @@ static int handle_remove(const char *nodename, struct device *dev)
mutex_lock(&dentry->d_inode->i_mutex); mutex_lock(&dentry->d_inode->i_mutex);
notify_change(dentry, &newattrs); notify_change(dentry, &newattrs);
mutex_unlock(&dentry->d_inode->i_mutex); mutex_unlock(&dentry->d_inode->i_mutex);
err = vfs_unlink(parent.dentry->d_inode, dentry); err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
if (!err || err == -ENOENT) if (!err || err == -ENOENT)
deleted = 1; deleted = 1;
} }
......
...@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, ...@@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
if (ret < 0) { if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error"); cachefiles_io_error(cache, "Unlink security error");
} else { } else {
ret = vfs_unlink(dir->d_inode, rep); ret = vfs_unlink(dir->d_inode, rep, NULL);
if (preemptive) if (preemptive)
cachefiles_mark_object_buried(cache, rep); cachefiles_mark_object_buried(cache, rep);
......
...@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, ...@@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
dget(lower_dentry); dget(lower_dentry);
lower_dir_dentry = lock_parent(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry);
rc = vfs_unlink(lower_dir_inode, lower_dentry); rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
if (rc) { if (rc) {
printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
goto out_unlock; goto out_unlock;
...@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode, ...@@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode,
inode = __ecryptfs_get_inode(lower_dentry->d_inode, inode = __ecryptfs_get_inode(lower_dentry->d_inode,
directory_inode->i_sb); directory_inode->i_sb);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
goto out_lock; goto out_lock;
} }
fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
......
...@@ -3615,7 +3615,25 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) ...@@ -3615,7 +3615,25 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
return do_rmdir(AT_FDCWD, pathname); return do_rmdir(AT_FDCWD, pathname);
} }
int vfs_unlink(struct inode *dir, struct dentry *dentry) /**
* vfs_unlink - unlink a filesystem object
* @dir: parent directory
* @dentry: victim
* @delegated_inode: returns victim inode, if the inode is delegated.
*
* The caller must hold dir->i_mutex.
*
* If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
* return a reference to the inode in delegated_inode. The caller
* should then break the delegation on that inode and retry. Because
* breaking a delegation may take a long time, the caller should drop
* dir->i_mutex before doing so.
*
* Alternatively, a caller may pass NULL for delegated_inode. This may
* be appropriate for callers that expect the underlying filesystem not
* to be NFS exported.
*/
int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
{ {
struct inode *target = dentry->d_inode; struct inode *target = dentry->d_inode;
int error = may_delete(dir, dentry, 0); int error = may_delete(dir, dentry, 0);
...@@ -3632,11 +3650,20 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) ...@@ -3632,11 +3650,20 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
else { else {
error = security_inode_unlink(dir, dentry); error = security_inode_unlink(dir, dentry);
if (!error) { if (!error) {
error = break_deleg(target, O_WRONLY|O_NONBLOCK);
if (error) {
if (error == -EWOULDBLOCK && delegated_inode) {
*delegated_inode = target;
ihold(target);
}
goto out;
}
error = dir->i_op->unlink(dir, dentry); error = dir->i_op->unlink(dir, dentry);
if (!error) if (!error)
dont_mount(dentry); dont_mount(dentry);
} }
} }
out:
mutex_unlock(&target->i_mutex); mutex_unlock(&target->i_mutex);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */ /* We don't d_delete() NFS sillyrenamed files--they still exist. */
...@@ -3661,6 +3688,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) ...@@ -3661,6 +3688,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
struct dentry *dentry; struct dentry *dentry;
struct nameidata nd; struct nameidata nd;
struct inode *inode = NULL; struct inode *inode = NULL;
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0; unsigned int lookup_flags = 0;
retry: retry:
name = user_path_parent(dfd, pathname, &nd, lookup_flags); name = user_path_parent(dfd, pathname, &nd, lookup_flags);
...@@ -3675,7 +3703,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) ...@@ -3675,7 +3703,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
error = mnt_want_write(nd.path.mnt); error = mnt_want_write(nd.path.mnt);
if (error) if (error)
goto exit1; goto exit1;
retry_deleg:
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd); dentry = lookup_hash(&nd);
error = PTR_ERR(dentry); error = PTR_ERR(dentry);
...@@ -3690,13 +3718,21 @@ static long do_unlinkat(int dfd, const char __user *pathname) ...@@ -3690,13 +3718,21 @@ static long do_unlinkat(int dfd, const char __user *pathname)
error = security_path_unlink(&nd.path, dentry); error = security_path_unlink(&nd.path, dentry);
if (error) if (error)
goto exit2; goto exit2;
error = vfs_unlink(nd.path.dentry->d_inode, dentry); error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
exit2: exit2:
dput(dentry); dput(dentry);
} }
mutex_unlock(&nd.path.dentry->d_inode->i_mutex); mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
if (inode) if (inode)
iput(inode); /* truncate the inode here */ iput(inode); /* truncate the inode here */
inode = NULL;
if (delegated_inode) {
error = break_deleg(delegated_inode, O_WRONLY);
iput(delegated_inode);
delegated_inode = NULL;
if (!error)
goto retry_deleg;
}
mnt_drop_write(nd.path.mnt); mnt_drop_write(nd.path.mnt);
exit1: exit1:
path_put(&nd.path); path_put(&nd.path);
......
...@@ -1910,7 +1910,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, ...@@ -1910,7 +1910,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (host_err) if (host_err)
goto out_put; goto out_put;
if (type != S_IFDIR) if (type != S_IFDIR)
host_err = vfs_unlink(dirp, rdentry); host_err = vfs_unlink(dirp, rdentry, NULL);
else else
host_err = vfs_rmdir(dirp, rdentry); host_err = vfs_rmdir(dirp, rdentry);
if (!host_err) if (!host_err)
......
...@@ -1455,7 +1455,7 @@ extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); ...@@ -1455,7 +1455,7 @@ extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
extern int vfs_symlink(struct inode *, struct dentry *, const char *); extern int vfs_symlink(struct inode *, struct dentry *, const char *);
extern int vfs_link(struct dentry *, struct inode *, struct dentry *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
/* /*
......
...@@ -886,7 +886,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) ...@@ -886,7 +886,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
err = -ENOENT; err = -ENOENT;
} else { } else {
ihold(inode); ihold(inode);
err = vfs_unlink(dentry->d_parent->d_inode, dentry); err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
} }
dput(dentry); dput(dentry);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册