提交 ace0c791 编写于 作者: E Eric W. Biederman

proc/sysctl: Don't grab i_lock under sysctl_lock.

Konstantin Khlebnikov <khlebnikov@yandex-team.ru> writes:
> This patch has locking problem. I've got lockdep splat under LTP.
>
> [ 6633.115456] ======================================================
> [ 6633.115502] [ INFO: possible circular locking dependency detected ]
> [ 6633.115553] 4.9.10-debug+ #9 Tainted: G             L
> [ 6633.115584] -------------------------------------------------------
> [ 6633.115627] ksm02/284980 is trying to acquire lock:
> [ 6633.115659]  (&sb->s_type->i_lock_key#4){+.+...}, at: [<ffffffff816bc1ce>] igrab+0x1e/0x80
> [ 6633.115834] but task is already holding lock:
> [ 6633.115882]  (sysctl_lock){+.+...}, at: [<ffffffff817e379b>] unregister_sysctl_table+0x6b/0x110
> [ 6633.116026] which lock already depends on the new lock.
> [ 6633.116026]
> [ 6633.116080]
> [ 6633.116080] the existing dependency chain (in reverse order) is:
> [ 6633.116117]
> -> #2 (sysctl_lock){+.+...}:
> -> #1 (&(&dentry->d_lockref.lock)->rlock){+.+...}:
> -> #0 (&sb->s_type->i_lock_key#4){+.+...}:
>
> d_lock nests inside i_lock
> sysctl_lock nests inside d_lock in d_compare
>
> This patch adds i_lock nesting inside sysctl_lock.

Al Viro <viro@ZenIV.linux.org.uk> replied:
> Once ->unregistering is set, you can drop sysctl_lock just fine.  So I'd
> try something like this - use rcu_read_lock() in proc_sys_prune_dcache(),
> drop sysctl_lock() before it and regain after.  Make sure that no inodes
> are added to the list ones ->unregistering has been set and use RCU list
> primitives for modifying the inode list, with sysctl_lock still used to
> serialize its modifications.
>
> Freeing struct inode is RCU-delayed (see proc_destroy_inode()), so doing
> igrab() is safe there.  Since we don't drop inode reference until after we'd
> passed beyond it in the list, list_for_each_entry_rcu() should be fine.

I agree with Al Viro's analsysis of the situtation.

Fixes: d6cffbbe ("proc/sysctl: prune stale dentries during unregistering")
Reported-by: NKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Tested-by: NKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Suggested-by: NAl Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: N"Eric W. Biederman" <ebiederm@xmission.com>
上级 fea6d2a6
...@@ -266,21 +266,19 @@ static void proc_sys_prune_dcache(struct ctl_table_header *head) ...@@ -266,21 +266,19 @@ static void proc_sys_prune_dcache(struct ctl_table_header *head)
struct inode *inode, *prev = NULL; struct inode *inode, *prev = NULL;
struct proc_inode *ei; struct proc_inode *ei;
list_for_each_entry(ei, &head->inodes, sysctl_inodes) { rcu_read_lock();
list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
inode = igrab(&ei->vfs_inode); inode = igrab(&ei->vfs_inode);
if (inode) { if (inode) {
spin_unlock(&sysctl_lock); rcu_read_unlock();
iput(prev); iput(prev);
prev = inode; prev = inode;
d_prune_aliases(inode); d_prune_aliases(inode);
spin_lock(&sysctl_lock); rcu_read_lock();
} }
} }
if (prev) { rcu_read_unlock();
spin_unlock(&sysctl_lock); iput(prev);
iput(prev);
spin_lock(&sysctl_lock);
}
} }
/* called under sysctl_lock, will reacquire if has to wait */ /* called under sysctl_lock, will reacquire if has to wait */
...@@ -296,10 +294,10 @@ static void start_unregistering(struct ctl_table_header *p) ...@@ -296,10 +294,10 @@ static void start_unregistering(struct ctl_table_header *p)
p->unregistering = &wait; p->unregistering = &wait;
spin_unlock(&sysctl_lock); spin_unlock(&sysctl_lock);
wait_for_completion(&wait); wait_for_completion(&wait);
spin_lock(&sysctl_lock);
} else { } else {
/* anything non-NULL; we'll never dereference it */ /* anything non-NULL; we'll never dereference it */
p->unregistering = ERR_PTR(-EINVAL); p->unregistering = ERR_PTR(-EINVAL);
spin_unlock(&sysctl_lock);
} }
/* /*
* Prune dentries for unregistered sysctls: namespaced sysctls * Prune dentries for unregistered sysctls: namespaced sysctls
...@@ -310,6 +308,7 @@ static void start_unregistering(struct ctl_table_header *p) ...@@ -310,6 +308,7 @@ static void start_unregistering(struct ctl_table_header *p)
* do not remove from the list until nobody holds it; walking the * do not remove from the list until nobody holds it; walking the
* list in do_sysctl() relies on that. * list in do_sysctl() relies on that.
*/ */
spin_lock(&sysctl_lock);
erase_header(p); erase_header(p);
} }
...@@ -455,11 +454,17 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, ...@@ -455,11 +454,17 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
inode->i_ino = get_next_ino(); inode->i_ino = get_next_ino();
ei = PROC_I(inode); ei = PROC_I(inode);
ei->sysctl = head;
ei->sysctl_entry = table;
spin_lock(&sysctl_lock); spin_lock(&sysctl_lock);
list_add(&ei->sysctl_inodes, &head->inodes); if (unlikely(head->unregistering)) {
spin_unlock(&sysctl_lock);
iput(inode);
inode = NULL;
goto out;
}
ei->sysctl = head;
ei->sysctl_entry = table;
list_add_rcu(&ei->sysctl_inodes, &head->inodes);
head->count++; head->count++;
spin_unlock(&sysctl_lock); spin_unlock(&sysctl_lock);
...@@ -487,7 +492,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, ...@@ -487,7 +492,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
{ {
spin_lock(&sysctl_lock); spin_lock(&sysctl_lock);
list_del(&PROC_I(inode)->sysctl_inodes); list_del_rcu(&PROC_I(inode)->sysctl_inodes);
if (!--head->count) if (!--head->count)
kfree_rcu(head, rcu); kfree_rcu(head, rcu);
spin_unlock(&sysctl_lock); spin_unlock(&sysctl_lock);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册