share_pool: support fork() and exit() to handle the mm

ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4NDAW CVE: NA ------------------- The fork() will create the new mm for new process, the mm should not take any information from the parent process, so need to clean it. The exit() will mmput the mm and free the memory, if the mm is alrready be used for sp_group, need to clean the group first. Signed-off-by: N Wang Wensheng <wangwensheng4@huawei.com> Signed-off-by: N Tang Yizhou <tangyizhou@huawei.com> Signed-off-by: N Peng Wu <wupeng58@huawei.com> Signed-off-by: N Zhou Guanghui <zhouguanghui1@huawei.com> Reviewed-by: Kefeng Wang<wangkefeng.wang@huawei.com> Reviewed-by: N Weilong Chen <chenweilong@huawei.com> Signed-off-by: N Zheng Zengkai <zhengzengkai@huawei.com>

share_pool: support fork() and exit() to handle the mm
ascend inclusion category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4NDAW CVE: NA ------------------- The fork() will create the new mm for new process, the mm should not take any information from the parent process, so need to clean it. The exit() will mmput the mm and free the memory, if the mm is alrready be used for sp_group, need to clean the group first. Signed-off-by: N Wang Wensheng <wangwensheng4@huawei.com> Signed-off-by: N Tang Yizhou <tangyizhou@huawei.com> Signed-off-by: N Peng Wu <wupeng58@huawei.com> Signed-off-by: N Zhou Guanghui <zhouguanghui1@huawei.com> Reviewed-by: Kefeng Wang<wangkefeng.wang@huawei.com> Reviewed-by: N Weilong Chen <chenweilong@huawei.com> Signed-off-by: N Zheng Zengkai <zhengzengkai@huawei.com>
ea5b5eee · Wang Wensheng · Zheng Zengkai · dc95c861 · ea5b5eee · ea5b5eee
隐藏空白更改
内联并排

Showing with 142 addition and 0 deletion

include/linux/share_pool.h include/linux/share_pool.h +2 -0

kernel/fork.c kernel/fork.c +7 -0

mm/mmap.c mm/mmap.c +5 -0

mm/share_pool.c mm/share_pool.c +128 -0

未找到文件。
--- a/include/linux/share_pool.h
+++ b/include/linux/share_pool.h
@@ -258,6 +258,8 @@ extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id);
 extern int sp_group_add_task(int pid, int spg_id);
 extern void sp_area_drop(struct vm_area_struct *vma);
+extern int sp_group_exit(struct mm_struct *mm);
+extern void sp_group_post_exit(struct mm_struct *mm);
 static inline bool sp_is_enabled(void)
 {

--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -98,6 +98,7 @@
 #include <linux/io_uring.h>
 #include <linux/share_pool.h>
+#include <linux/share_pool.h>
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1092,6 +1093,9 @@ static inline void __mmput(struct mm_struct *mm)
 	ksm_exit(mm);
 	khugepaged_exit(mm); /* must run before exit_mmap */
 	exit_mmap(mm);
+	sp_group_post_exit(mm);
 	mm_put_huge_zero_page(mm);
 	set_mm_exe_file(mm, NULL);
 	if (!list_empty(&mm->mmlist)) {
@@ -1111,6 +1115,9 @@ void mmput(struct mm_struct *mm)
 {
 	might_sleep();
+	if (sp_group_exit(mm))
+		return;
 	if (atomic_dec_and_test(&mm->mm_users))
 		__mmput(mm);
 }

--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -183,6 +183,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	mpol_put(vma_policy(vma));
+	sp_area_drop(vma);
 	vm_area_free(vma);
 	return next;
 }
@@ -1174,6 +1175,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 	if (vm_flags & VM_SPECIAL)
 		return NULL;
+	/* don't merge this kind of vma as sp_area couldn't be merged */
+	if (sp_check_vm_share_pool(vm_flags))
+		return NULL;
 	next = vma_next(mm, prev);
 	area = next;
 	if (area && area->vm_end == end)		/* cases 6, 7, 8 */

--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -4139,6 +4139,134 @@ static void __init proc_sharepool_init(void)
 /*** End of tatistical and maintenance functions ***/
+#define MM_WOULD_FREE	1
+/*
+ * Recall we add mm->users by 1 deliberately in sp_group_add_task().
+ * If the mm_users == sp_group_master->count + 1, it means that the mm is ready
+ * to be freed because the last owner of this mm is in exiting procedure:
+ * do_exit() -> exit_mm() -> mmput() -> sp_group_exit -> THIS function.
+ */
+static bool need_free_sp_group(struct mm_struct *mm,
+			      struct sp_group_master *master)
+{
+	/* thread exits but process is still alive */
+	if ((unsigned int)atomic_read(&mm->mm_users) != master->count + MM_WOULD_FREE) {
+		if (atomic_dec_and_test(&mm->mm_users))
+			WARN(1, "Invalid user counting\n");
+		return false;
+	}
+	return true;
+}
+/*
+ * Return:
+ * 1	- let mmput() return immediately
+ * 0	- let mmput() decrease mm_users and try __mmput()
+ */
+int sp_group_exit(struct mm_struct *mm)
+{
+	struct sp_group *spg;
+	struct sp_group_master *master;
+	struct sp_group_node *spg_node, *tmp;
+	bool is_alive = true;
+	if (!sp_is_enabled())
+		return 0;
+	down_write(&sp_group_sem);
+	master = mm->sp_group_master;
+	if (!master) {
+		up_write(&sp_group_sem);
+		return 0;
+	}
+	if (!need_free_sp_group(mm, master)) {
+		up_write(&sp_group_sem);
+		return 1;
+	}
+	list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) {
+		spg = spg_node->spg;
+		down_write(&spg->rw_lock);
+		/* a dead group should NOT be reactive again */
+		if (spg_valid(spg) && list_is_singular(&spg->procs))
+			is_alive = spg->is_alive = false;
+		spg->proc_num--;
+		list_del(&spg_node->proc_node);
+		up_write(&spg->rw_lock);
+		if (!is_alive)
+			blocking_notifier_call_chain(&sp_notifier_chain, 0,
+						     spg);
+	}
+	/* match with get_task_mm() in sp_group_add_task() */
+	if (atomic_sub_and_test(master->count, &mm->mm_users)) {
+		up_write(&sp_group_sem);
+		WARN(1, "Invalid user counting\n");
+		return 1;
+	}
+	up_write(&sp_group_sem);
+	return 0;
+}
+void sp_group_post_exit(struct mm_struct *mm)
+{
+	struct sp_proc_stat *stat;
+	long alloc_size, k2u_size;
+	/* lockless visit */
+	struct sp_group_master *master = mm->sp_group_master;
+	struct sp_group_node *spg_node, *tmp;
+	struct sp_group *spg;
+	if (!sp_is_enabled() || !master)
+		return;
+	/*
+	 * There are two basic scenarios when a process in the share pool is
+	 * exiting but its share pool memory usage is not 0.
+	 * 1. Process A called sp_alloc(), but it terminates without calling
+	 *    sp_free(). Then its share pool memory usage is a positive number.
+	 * 2. Process A never called sp_alloc(), and process B in the same spg
+	 *    called sp_alloc() to get an addr u. Then A gets u somehow and
+	 *    called sp_free(u). Now A's share pool memory usage is a negative
+	 *    number. Notice B's memory usage will be a positive number.
+	 *
+	 * We decide to print an info when seeing both of the scenarios.
+	 *
+	 * A process not in an sp group doesn't need to print because there
+	 * wont't be any memory which is not freed.
+	 */
+	stat = sp_get_proc_stat(mm);
+	if (stat) {
+		alloc_size = atomic64_read(&stat->alloc_size);
+		k2u_size = atomic64_read(&stat->k2u_size);
+		if (alloc_size != 0 || k2u_size != 0)
+			pr_info("process %s(%d) exits. It applied %ld aligned KB, k2u shared %ld aligned KB\n",
+				stat->comm, stat->tgid,
+				byte2kb(alloc_size), byte2kb(k2u_size));
+		/* match with sp_init_proc_stat, we expect stat is released after this call */
+		sp_proc_stat_drop(stat);
+	}
+	/* lockless traverse */
+	list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) {
+		spg = spg_node->spg;
+		/* match with refcount inc in sp_group_add_task */
+		sp_group_drop(spg);
+		kfree(spg_node);
+	}
+	kfree(master);
+}
 DEFINE_STATIC_KEY_FALSE(share_pool_enabled_key);
 static int __init enable_share_pool(char *s)