share_pool: Extract sp_alloc_mmap_populate

ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA ------------------------------------------------- Refactor sp_alloc to improve its readability. Extract sp_alloc_mmap_populate, which consists of sp_alloc_mmap and sp_alloc_populate. Signed-off-by: N Tang Yizhou <tangyizhou@huawei.com> Reviewed-by: N Ding Tianhong <dingtianhong@huawei.com> Signed-off-by: N Zhou Guanghui <zhouguanghui1@huawei.com> Reviewed-by: N Weilong Chen <chenweilong@huawei.com> Signed-off-by: N Yang Yingliang <yangyingliang@huawei.com>

share_pool: Extract sp_alloc_mmap_populate
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA ------------------------------------------------- Refactor sp_alloc to improve its readability. Extract sp_alloc_mmap_populate, which consists of sp_alloc_mmap and sp_alloc_populate. Signed-off-by: N Tang Yizhou <tangyizhou@huawei.com> Reviewed-by: N Ding Tianhong <dingtianhong@huawei.com> Signed-off-by: N Zhou Guanghui <zhouguanghui1@huawei.com> Reviewed-by: N Weilong Chen <chenweilong@huawei.com> Signed-off-by: N Yang Yingliang <yangyingliang@huawei.com>
f3104594 · Tang Yizhou · Yang Yingliang · 48cbb927 · f3104594 · f3104594
显示空白变更内容
内联并排

Showing with 177 addition and 104 deletion

include/linux/share_pool.h include/linux/share_pool.h +2 -2

mm/share_pool.c mm/share_pool.c +175 -102

未找到文件。
--- a/include/linux/share_pool.h
+++ b/include/linux/share_pool.h
@@ -58,6 +58,8 @@ extern bool vmap_allow_huge;

 struct sp_spg_stat {
 	int spg_id;
+	/* record the number of hugepage allocation failures */
+	atomic_t hugepage_failures;
 	/* number of sp_area */
 	atomic_t	 spa_num;
 	/* total size of all sp_area from sp_alloc and k2u */
@@ -98,8 +100,6 @@ struct sp_spg_stat {
 */
 struct sp_group {
 	int		 id;
-	/* record the number of hugepage allocation failures */
-	int		 hugepage_failures;
 	struct file	 *file;
 	struct file	 *file_hugetlb;
 	/* number of process in this group */

--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -425,6 +425,7 @@ static struct sp_spg_stat *create_spg_stat(int spg_id)
 	}

 	stat->spg_id = spg_id;
+	atomic_set(&stat->hugepage_failures, 0);
 	atomic_set(&stat->spa_num, 0);
 	atomic64_set(&stat->size, 0);
 	atomic64_set(&stat->alloc_nsize, 0);
@@ -878,7 +879,6 @@ static struct sp_group *create_spg(int spg_id)
 	spg->id = spg_id;
 	spg->is_alive = true;
 	spg->proc_num = 0;
-	spg->hugepage_failures = 0;
 	spg->dvpp_multi_spaces = false;
 	spg->owner = current->group_leader;
 	atomic_set(&spg->use_count, 1);
@@ -1831,6 +1831,7 @@ struct sp_alloc_context {
 	unsigned long sp_flags;
 	unsigned long populate;
 	int state;
+	bool need_fallocate;
 };

 static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
@@ -1915,91 +1916,101 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
 	ac->size = size;
 	ac->sp_flags = sp_flags;
 	ac->state = ALLOC_NORMAL;
+	ac->need_fallocate = false;
 	return 0;
 }

-/**
- * sp_alloc() - Allocate shared memory for all the processes in a sp_group.
- * @size: the size of memory to allocate.
- * @sp_flags: how to allocate the memory.
- * @spg_id: the share group that the memory is allocated to.
- *
- * Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode.
- *
- * Return:
- * * if succeed, return the starting kernel address of the shared memory.
- * * if fail, return the pointer of -errno.
- */
-void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
+static void sp_alloc_unmap(struct mm_struct *mm, struct sp_area *spa,
+	struct sp_group_node *spg_node)
 {
-	struct sp_group *spg;
-	struct sp_area *spa = NULL;
-	unsigned long sp_addr;
-	unsigned long mmap_addr;
-	void *p;  /* return value */
-	struct mm_struct *mm;
-	struct file *file;
-	unsigned long size_aligned;
-	int ret = 0;
-	unsigned int noreclaim_flag;
-	struct sp_group_node *spg_node;
-	struct sp_alloc_context ac;
-
-	ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac);
-	if (ret)
-		return ERR_PTR(ret);
-
-try_again:
-	spa = sp_alloc_area(ac.size_aligned, ac.sp_flags, ac.spg,
-			    SPA_TYPE_ALLOC, current->tgid);
-	if (IS_ERR(spa)) {
-		pr_err_ratelimited("alloc spa failed in allocation"
-			"(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa));
-		ret = PTR_ERR(spa);
-		goto out;
-	}
-	sp_addr = spa->va_start;
+	__sp_free(spa->spg, spa->va_start, spa->real_size, mm);
+}

-	/* create mapping for each process in the group */
-	list_for_each_entry(spg_node, &spg->procs, proc_node) {
+static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
+	struct sp_group_node *spg_node, struct sp_alloc_context *ac)
+{
+	int ret = 0;
+	unsigned long mmap_addr;
+	unsigned long prot;
+	unsigned long sp_addr = spa->va_start;
 	unsigned long populate = 0;
 	struct vm_area_struct *vma;
-		mm = spg_node->master->mm;

 	down_write(&mm->mmap_sem);
 	if (unlikely(mm->core_state)) {
 		up_write(&mm->mmap_sem);
+		sp_alloc_unmap(mm, spa, spg_node);
+		ac->state = ALLOC_NOMEM;
 		pr_info("allocation encountered coredump\n");
-			continue;
+		return -EFAULT;
 	}

-		mmap_addr = sp_mmap(mm, file, spa, &populate, spg_node->prot);
+	prot = spg_node->prot;
+
+	/* when success, mmap_addr == spa->va_start */
+	mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot);
 	if (IS_ERR_VALUE(mmap_addr)) {
 		up_write(&mm->mmap_sem);
-			p = (void *)mmap_addr;
-			__sp_free(spg, sp_addr, size_aligned, mm);
+		sp_alloc_unmap(mm, spa, spg_node);
 		pr_err("sp mmap in allocation failed %ld\n", mmap_addr);
-			goto out;
+		return PTR_ERR((void *)mmap_addr);
 	}

-		p = (void *)mmap_addr;  /* success */
-		if (populate == 0) {
+	if (unlikely(populate == 0)) {
 		up_write(&mm->mmap_sem);
-			continue;
+		pr_err("allocation sp mmap populate failed\n");
+		ret = -EFAULT;
+		goto unmap;
 	}
+	ac->populate = populate;

 	vma = find_vma(mm, sp_addr);
 	if (unlikely(!vma)) {
 		up_write(&mm->mmap_sem);
-			pr_debug("allocation failed, can't find %lx vma\n", (unsigned long)sp_addr);
-			p = ERR_PTR(-EINVAL);
-			goto out;
+		WARN(1, "allocation failed, can't find %lx vma\n", sp_addr);
+		ret = -EINVAL;
+		goto unmap;
 	}
 	/* clean PTE_RDONLY flags or trigger SMMU event */
-		if (spg_node->prot & PROT_WRITE)
+	if (prot & PROT_WRITE)
 		vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
 	up_write(&mm->mmap_sem);

+	return ret;
+
+unmap:
+	sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
+	return ret;
+}
+
+static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac)
+{
+	struct sp_spg_stat *stat = ac->spg->stat;
+
+	if (ac->file == ac->spg->file) {
+		ac->state = ALLOC_NOMEM;
+		return;
+	}
+
+	atomic_inc(&stat->hugepage_failures);
+	if (!(ac->sp_flags & SP_HUGEPAGE_ONLY)) {
+		ac->file = ac->spg->file;
+		ac->size_aligned = ALIGN(ac->size, PAGE_SIZE);
+		ac->sp_flags &= ~SP_HUGEPAGE;
+		ac->state = ALLOC_RETRY;
+		__sp_area_drop(spa);
+		return;
+	}
+	ac->state = ALLOC_NOMEM;
+}
+
+static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa,
+	struct sp_group_node *spg_node, struct sp_alloc_context *ac)
+{
+	int ret = 0;
+	unsigned long sp_addr = spa->va_start;
+	unsigned int noreclaim_flag = 0;
+
 	/*
 	 * The direct reclaim and compact may take a long
 	 * time. As a result, sp mutex will be hold for too
@@ -2020,38 +2031,100 @@ void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
 	 * page fault later on, and more importantly sp_make_share_u2k()
 	 * depends on this feature (and MAP_LOCKED) to work correctly.
 	 */
-		ret = do_mm_populate(mm, sp_addr, populate, 0);
+	ret = do_mm_populate(mm, sp_addr, ac->populate, 0);
 	if (spa->is_hugepage) {
 		memalloc_noreclaim_restore(noreclaim_flag);
 		if (ret)
 			sp_add_work_compact();
 	}
 	if (ret) {
-			__sp_free(spg, sp_addr, size_aligned,
-				  (list_next_entry(spg_node, proc_node))->master->mm);
+		sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
 		if (unlikely(fatal_signal_pending(current)))
 			pr_warn_ratelimited("allocation failed, current thread is killed\n");
 		else
 			pr_warn_ratelimited("allocation failed due to mm populate failed"
 					    "(potential no enough memory when -12): %d\n", ret);
+		sp_fallocate(spa);  /* need this, otherwise memleak */
+		sp_alloc_fallback(spa, ac);
+	} else {
+		ac->need_fallocate = true;
+	}
+	return ret;
+}

+static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
+	struct sp_group_node *spg_node, struct sp_alloc_context *ac)
+{
+	int ret;
+
+	ret = sp_alloc_mmap(mm, spa, spg_node, ac);
+	if (ret < 0) {
+		if (ac->need_fallocate) {
+			/* e.g. second sp_mmap fail */
 			sp_fallocate(spa);
-			if (file == spg->file_hugetlb) {
-				spg->hugepage_failures++;
-
-				/* fallback to small pages */
-				if (!(sp_flags & SP_HUGEPAGE_ONLY)) {
-					file = spg->file;
-					size_aligned = ALIGN(size, PAGE_SIZE);
-					sp_flags &= ~SP_HUGEPAGE;
-					__sp_area_drop(spa);
-					goto try_again;
+			ac->need_fallocate = false;
 		}
+		return ret;
 	}
-			break;
+
+	ret = sp_alloc_populate(mm, spa, spg_node, ac);
+	return ret;
+}
+
+static int sp_alloc_mmap_populate(struct sp_area *spa,
+				  struct sp_alloc_context *ac)
+{
+	int ret;
+	struct mm_struct *mm;
+	struct sp_group_node *spg_node;
+
+	/* create mapping for each process in the group */
+	list_for_each_entry(spg_node, &spa->spg->procs, proc_node) {
+		mm = spg_node->master->mm;
+		ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac);
+		if (ret)
+			return ret;
 	}
+	return ret;
+}
+
+/**
+ * sp_alloc() - Allocate shared memory for all the processes in a sp_group.
+ * @size: the size of memory to allocate.
+ * @sp_flags: how to allocate the memory.
+ * @spg_id: the share group that the memory is allocated to.
+ *
+ * Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode.
+ *
+ * Return:
+ * * if succeed, return the starting kernel address of the shared memory.
+ * * if fail, return the pointer of -errno.
+ */
+void *sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
+{
+	struct sp_group *spg;
+	struct sp_area *spa = NULL;
+	int ret = 0;
+	struct sp_alloc_context ac;
+
+	ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac);
+	if (ret)
+		return ERR_PTR(ret);
+
+try_again:
+	spa = sp_alloc_area(ac.size_aligned, ac.sp_flags, ac.spg,
+			    SPA_TYPE_ALLOC, current->tgid);
+	if (IS_ERR(spa)) {
+		pr_err_ratelimited("alloc spa failed in allocation"
+			"(potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa));
+		ret = PTR_ERR(spa);
+		goto out;
 	}

+	ret = sp_alloc_mmap_populate(spa, &ac);
+	if (ret && ac.state == ALLOC_RETRY)
+		goto try_again;
+
 out:
 	up_read(&spg->rw_lock);

@@ -3186,7 +3259,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
 	down_read(&spg->rw_lock);
 	if (spg_valid(spg)) {
 		spg_id = spg->id;
-		hugepage_failures = spg->hugepage_failures;
+		hugepage_failures = atomic_read(&spg->stat->hugepage_failures);
 		up_read(&spg->rw_lock);

 		/* eliminate potential ABBA deadlock */