From 59eda0e07f43c950d31756213b607af673e551f0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 Jan 2015 17:53:21 -0500
Subject: [PATCH] new fs_pin killing logics

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_pin.c                   | 54 ++++++++++++++++++++---
 include/linux/fs_pin.h        | 13 +++++-
 include/linux/pid_namespace.h |  4 +-
 kernel/acct.c                 | 81 +++++++++++++++--------------------
 4 files changed, 96 insertions(+), 56 deletions(-)

diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 50ef7d2ef03c..0c77bdc238b2 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -1,4 +1,5 @@
 #include <linux/fs.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/fs_pin.h>
 #include "internal.h"
@@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin)
 	hlist_del(&pin->m_list);
 	hlist_del(&pin->s_list);
 	spin_unlock(&pin_lock);
+	spin_lock_irq(&pin->wait.lock);
+	pin->done = 1;
+	wake_up_locked(&pin->wait);
+	spin_unlock_irq(&pin->wait.lock);
 }
 
 void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
@@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
 	pin_insert_group(pin, m, &m->mnt_sb->s_pins);
 }
 
+void pin_kill(struct fs_pin *p)
+{
+	wait_queue_t wait;
+
+	if (!p) {
+		rcu_read_unlock();
+		return;
+	}
+	init_wait(&wait);
+	spin_lock_irq(&p->wait.lock);
+	if (likely(!p->done)) {
+		p->done = -1;
+		spin_unlock_irq(&p->wait.lock);
+		rcu_read_unlock();
+		p->kill(p);
+		return;
+	}
+	if (p->done > 0) {
+		spin_unlock_irq(&p->wait.lock);
+		rcu_read_unlock();
+		return;
+	}
+	__add_wait_queue(&p->wait, &wait);
+	while (1) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_unlock_irq(&p->wait.lock);
+		rcu_read_unlock();
+		schedule();
+		rcu_read_lock();
+		if (likely(list_empty(&wait.task_list)))
+			break;
+		/* OK, we know p couldn't have been freed yet */
+		spin_lock_irq(&p->wait.lock);
+		if (p->done > 0) {
+			spin_unlock_irq(&p->wait.lock);
+			break;
+		}
+	}
+	rcu_read_unlock();
+}
+
 void mnt_pin_kill(struct mount *m)
 {
 	while (1) {
 		struct hlist_node *p;
-		struct fs_pin *pin;
 		rcu_read_lock();
 		p = ACCESS_ONCE(m->mnt_pins.first);
 		if (!p) {
 			rcu_read_unlock();
 			break;
 		}
-		pin = hlist_entry(p, struct fs_pin, m_list);
-		pin->kill(pin);
+		pin_kill(hlist_entry(p, struct fs_pin, m_list));
 	}
 }
 
@@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p)
 {
 	while (1) {
 		struct hlist_node *q;
-		struct fs_pin *pin;
 		rcu_read_lock();
 		q = ACCESS_ONCE(p->first);
 		if (!q) {
 			rcu_read_unlock();
 			break;
 		}
-		pin = hlist_entry(q, struct fs_pin, s_list);
-		pin->kill(pin);
+		pin_kill(hlist_entry(q, struct fs_pin, s_list));
 	}
 }
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
index 2be38d1464ae..9dc4e0384bfb 100644
--- a/include/linux/fs_pin.h
+++ b/include/linux/fs_pin.h
@@ -1,11 +1,22 @@
-#include <linux/fs.h>
+#include <linux/wait.h>
 
 struct fs_pin {
+	wait_queue_head_t	wait;
+	int			done;
 	struct hlist_node	s_list;
 	struct hlist_node	m_list;
 	void (*kill)(struct fs_pin *);
 };
 
+struct vfsmount;
+
+static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
+{
+	init_waitqueue_head(&p->wait);
+	p->kill = kill;
+}
+
 void pin_remove(struct fs_pin *);
 void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *);
 void pin_insert(struct fs_pin *, struct vfsmount *);
+void pin_kill(struct fs_pin *);
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index b9cf6c51b181..918b117a7cd3 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -19,7 +19,7 @@ struct pidmap {
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
 #define PIDMAP_ENTRIES		((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE)
 
-struct bsd_acct_struct;
+struct fs_pin;
 
 struct pid_namespace {
 	struct kref kref;
@@ -37,7 +37,7 @@ struct pid_namespace {
 	struct dentry *proc_thread_self;
 #endif
 #ifdef CONFIG_BSD_PROCESS_ACCT
-	struct bsd_acct_struct *bacct;
+	struct fs_pin *bacct;
 #endif
 	struct user_namespace *user_ns;
 	struct work_struct proc_work;
diff --git a/kernel/acct.c b/kernel/acct.c
index cf6588ab517b..e6c10d1a4058 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30};
 /*
  * External references and all of the globals.
  */
-static void do_acct_process(struct bsd_acct_struct *acct);
 
 struct bsd_acct_struct {
 	struct fs_pin		pin;
@@ -91,6 +90,8 @@ struct bsd_acct_struct {
 	struct completion	done;
 };
 
+static void do_acct_process(struct bsd_acct_struct *acct);
+
 /*
  * Check the amount of free space and suspend/resume accordingly.
  */
@@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p)
 		kfree_rcu(p, rcu);
 }
 
+static inline struct bsd_acct_struct *to_acct(struct fs_pin *p)
+{
+	return p ? container_of(p, struct bsd_acct_struct, pin) : NULL;
+}
+
 static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
 {
 	struct bsd_acct_struct *res;
 again:
 	smp_rmb();
 	rcu_read_lock();
-	res = ACCESS_ONCE(ns->bacct);
+	res = to_acct(ACCESS_ONCE(ns->bacct));
 	if (!res) {
 		rcu_read_unlock();
 		return NULL;
@@ -150,7 +156,7 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
 	}
 	rcu_read_unlock();
 	mutex_lock(&res->lock);
-	if (!res->ns) {
+	if (res != to_acct(ACCESS_ONCE(ns->bacct))) {
 		mutex_unlock(&res->lock);
 		acct_put(res);
 		goto again;
@@ -158,6 +164,19 @@ static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
 	return res;
 }
 
+static void acct_pin_kill(struct fs_pin *pin)
+{
+	struct bsd_acct_struct *acct = to_acct(pin);
+	mutex_lock(&acct->lock);
+	do_acct_process(acct);
+	schedule_work(&acct->work);
+	wait_for_completion(&acct->done);
+	cmpxchg(&acct->ns->bacct, pin, NULL);
+	mutex_unlock(&acct->lock);
+	pin_remove(pin);
+	acct_put(acct);
+}
+
 static void close_work(struct work_struct *work)
 {
 	struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
@@ -168,49 +187,13 @@ static void close_work(struct work_struct *work)
 	complete(&acct->done);
 }
 
-static void acct_kill(struct bsd_acct_struct *acct)
-{
-	if (acct) {
-		struct pid_namespace *ns = acct->ns;
-		do_acct_process(acct);
-		INIT_WORK(&acct->work, close_work);
-		init_completion(&acct->done);
-		schedule_work(&acct->work);
-		wait_for_completion(&acct->done);
-		pin_remove(&acct->pin);
-		cmpxchg(&ns->bacct, acct, NULL);
-		acct->ns = NULL;
-		atomic_long_dec(&acct->count);
-		mutex_unlock(&acct->lock);
-		acct_put(acct);
-	}
-}
-
-static void acct_pin_kill(struct fs_pin *pin)
-{
-	struct bsd_acct_struct *acct;
-	acct = container_of(pin, struct bsd_acct_struct, pin);
-	if (!atomic_long_inc_not_zero(&acct->count)) {
-		rcu_read_unlock();
-		cpu_relax();
-		return;
-	}
-	rcu_read_unlock();
-	mutex_lock(&acct->lock);
-	if (!acct->ns) {
-		mutex_unlock(&acct->lock);
-		acct_put(acct);
-		acct = NULL;
-	}
-	acct_kill(acct);
-}
-
 static int acct_on(struct filename *pathname)
 {
 	struct file *file;
 	struct vfsmount *mnt, *internal;
 	struct pid_namespace *ns = task_active_pid_ns(current);
-	struct bsd_acct_struct *acct, *old;
+	struct bsd_acct_struct *acct;
+	struct fs_pin *old;
 	int err;
 
 	acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
@@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname)
 	file->f_path.mnt = internal;
 
 	atomic_long_set(&acct->count, 1);
-	acct->pin.kill = acct_pin_kill;
+	init_fs_pin(&acct->pin, acct_pin_kill);
 	acct->file = file;
 	acct->needcheck = jiffies;
 	acct->ns = ns;
 	mutex_init(&acct->lock);
+	INIT_WORK(&acct->work, close_work);
+	init_completion(&acct->done);
 	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
 	pin_insert(&acct->pin, mnt);
 
-	old = acct_get(ns);
-	ns->bacct = acct;
-	acct_kill(old);
+	rcu_read_lock();
+	old = xchg(&ns->bacct, &acct->pin);
 	mutex_unlock(&acct->lock);
+	pin_kill(old);
 	mnt_drop_write(mnt);
 	mntput(mnt);
 	return 0;
@@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
 		mutex_unlock(&acct_on_mutex);
 		putname(tmp);
 	} else {
-		acct_kill(acct_get(task_active_pid_ns(current)));
+		rcu_read_lock();
+		pin_kill(task_active_pid_ns(current)->bacct);
 	}
 
 	return error;
@@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
 
 void acct_exit_ns(struct pid_namespace *ns)
 {
-	acct_kill(acct_get(ns));
+	rcu_read_lock();
+	pin_kill(ns->bacct);
 }
 
 /*
-- 
GitLab