提交 db9effe9 编写于 作者: L Linus Torvalds

Merge branch 'vfs-scale-working' of...

Merge branch 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin

* 'vfs-scale-working' of git://git.kernel.org/pub/scm/linux/kernel/git/npiggin/linux-npiggin:
  fs: fix do_last error case when need_reval_dot
  nfs: add missing rcu-walk check
  fs: hlist UP debug fixup
  fs: fix dropping of rcu-walk from force_reval_path
  fs: force_reval_path drop rcu-walk before d_invalidate
  fs: small rcu-walk documentation fixes

Fixed up trivial conflicts in Documentation/filesystems/porting
...@@ -365,8 +365,8 @@ must be done in the RCU callback. ...@@ -365,8 +365,8 @@ must be done in the RCU callback.
[recommended] [recommended]
vfs now tries to do path walking in "rcu-walk mode", which avoids vfs now tries to do path walking in "rcu-walk mode", which avoids
atomic operations and scalability hazards on dentries and inodes (see atomic operations and scalability hazards on dentries and inodes (see
Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above) Documentation/filesystems/path-lookup.txt). d_hash and d_compare changes
are examples of the changes required to support this. For more complex (above) are examples of the changes required to support this. For more complex
filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so
no changes are required to the filesystem. However, this is costly and loses no changes are required to the filesystem. However, this is costly and loses
the benefits of rcu-walk mode. We will begin to add filesystem callbacks that the benefits of rcu-walk mode. We will begin to add filesystem callbacks that
...@@ -383,8 +383,8 @@ Documentation/filesystems/vfs.txt for more details. ...@@ -383,8 +383,8 @@ Documentation/filesystems/vfs.txt for more details.
permission and check_acl are inode permission checks that are called permission and check_acl are inode permission checks that are called
on many or all directory inodes on the way down a path walk (to check for on many or all directory inodes on the way down a path walk (to check for
exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU).
Documentation/filesystems/vfs.txt for more details. See Documentation/filesystems/vfs.txt for more details.
-- --
[mandatory] [mandatory]
......
...@@ -415,8 +415,8 @@ otherwise noted. ...@@ -415,8 +415,8 @@ otherwise noted.
permission: called by the VFS to check for access rights on a POSIX-like permission: called by the VFS to check for access rights on a POSIX-like
filesystem. filesystem.
May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk
mode, the filesystem must check the permission without blocking or mode, the filesystem must check the permission without blocking or
storing to the inode. storing to the inode.
If a situation is encountered that rcu-walk cannot handle, return If a situation is encountered that rcu-walk cannot handle, return
......
...@@ -479,6 +479,14 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry ...@@ -479,6 +479,14 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
struct fs_struct *fs = current->fs; struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry; struct dentry *parent = nd->path.dentry;
/*
* It can be possible to revalidate the dentry that we started
* the path walk with. force_reval_path may also revalidate the
* dentry already committed to the nameidata.
*/
if (unlikely(parent == dentry))
return nameidata_drop_rcu(nd);
BUG_ON(!(nd->flags & LOOKUP_RCU)); BUG_ON(!(nd->flags & LOOKUP_RCU));
if (nd->root.mnt) { if (nd->root.mnt) {
spin_lock(&fs->lock); spin_lock(&fs->lock);
...@@ -583,6 +591,13 @@ void release_open_intent(struct nameidata *nd) ...@@ -583,6 +591,13 @@ void release_open_intent(struct nameidata *nd)
fput(nd->intent.open.file); fput(nd->intent.open.file);
} }
/*
* Call d_revalidate and handle filesystems that request rcu-walk
* to be dropped. This may be called and return in rcu-walk mode,
* regardless of success or error. If -ECHILD is returned, the caller
* must return -ECHILD back up the path walk stack so path walk may
* be restarted in ref-walk mode.
*/
static int d_revalidate(struct dentry *dentry, struct nameidata *nd) static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
{ {
int status; int status;
...@@ -673,6 +688,9 @@ force_reval_path(struct path *path, struct nameidata *nd) ...@@ -673,6 +688,9 @@ force_reval_path(struct path *path, struct nameidata *nd)
return 0; return 0;
if (!status) { if (!status) {
/* Don't d_invalidate in rcu-walk mode */
if (nameidata_drop_rcu(nd))
return -ECHILD;
d_invalidate(dentry); d_invalidate(dentry);
status = -ESTALE; status = -ESTALE;
} }
...@@ -2105,11 +2123,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, ...@@ -2105,11 +2123,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
dir = nd->path.dentry; dir = nd->path.dentry;
case LAST_DOT: case LAST_DOT:
if (need_reval_dot(dir)) { if (need_reval_dot(dir)) {
error = d_revalidate(nd->path.dentry, nd); int status = d_revalidate(nd->path.dentry, nd);
if (!error) if (!status)
error = -ESTALE; status = -ESTALE;
if (error < 0) if (status < 0) {
error = status;
goto exit; goto exit;
}
} }
/* fallthrough */ /* fallthrough */
case LAST_ROOT: case LAST_ROOT:
......
...@@ -1406,11 +1406,15 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry ...@@ -1406,11 +1406,15 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
{ {
struct dentry *parent = NULL; struct dentry *parent = NULL;
struct inode *inode = dentry->d_inode; struct inode *inode;
struct inode *dir; struct inode *dir;
struct nfs_open_context *ctx; struct nfs_open_context *ctx;
int openflags, ret = 0; int openflags, ret = 0;
if (nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
if (!is_atomic_open(nd) || d_mountpoint(dentry)) if (!is_atomic_open(nd) || d_mountpoint(dentry))
goto no_open; goto no_open;
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
* some fast and compact auxiliary data. * some fast and compact auxiliary data.
*/ */
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) #if defined(CONFIG_SMP)
#define LIST_BL_LOCKMASK 1UL #define LIST_BL_LOCKMASK 1UL
#else #else
#define LIST_BL_LOCKMASK 0UL #define LIST_BL_LOCKMASK 0UL
...@@ -62,7 +62,8 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h, ...@@ -62,7 +62,8 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h,
struct hlist_bl_node *n) struct hlist_bl_node *n)
{ {
LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK)); LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) !=
LIST_BL_LOCKMASK);
h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK); h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
} }
......
...@@ -11,7 +11,8 @@ static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, ...@@ -11,7 +11,8 @@ static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
struct hlist_bl_node *n) struct hlist_bl_node *n)
{ {
LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK)); LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) !=
LIST_BL_LOCKMASK);
rcu_assign_pointer(h->first, rcu_assign_pointer(h->first,
(struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK)); (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK));
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册