mm: Do limit checking after memory allocation for memory reliable

hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA -------------------------------- Previous limit checking are done before real memory allocation. This will lead to some steps in __alloc_pages_slowpath() unreached(kswapd, direct compact and so on). Now limit checking are done in the end of __alloc_pages_nodemask(). Pages will be released if this memory allocation is stopped by limit checking. Memory allocation will fallback to movable zone if one of the following conditions are met: - memory reliable fallback is enabled - global init process Memory allocation with __GFP_NOFAIL will not check any limit. If memory reliable fallback is disabled and gfp flag does not contains any of the following flags: - __GFP_NORETRY - __GFP_RETRY_MAYFAIL - __GFP_THISNODE Or the following conditions are false - current->flags & PF_DUMPCORE - order > PAGE_ALLOC_COSTLY_ORDER Oom will occur to release some memory. Signed-off-by: N Ma Wupeng <mawupeng1@huawei.com> Reviewed-by: N Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: N Yongqiang Liu <liuyongqiang13@huawei.com>

mm: Do limit checking after memory allocation for memory reliable
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA -------------------------------- Previous limit checking are done before real memory allocation. This will lead to some steps in __alloc_pages_slowpath() unreached(kswapd, direct compact and so on). Now limit checking are done in the end of __alloc_pages_nodemask(). Pages will be released if this memory allocation is stopped by limit checking. Memory allocation will fallback to movable zone if one of the following conditions are met: - memory reliable fallback is enabled - global init process Memory allocation with __GFP_NOFAIL will not check any limit. If memory reliable fallback is disabled and gfp flag does not contains any of the following flags: - __GFP_NORETRY - __GFP_RETRY_MAYFAIL - __GFP_THISNODE Or the following conditions are false - current->flags & PF_DUMPCORE - order > PAGE_ALLOC_COSTLY_ORDER Oom will occur to release some memory. Signed-off-by: N Ma Wupeng <mawupeng1@huawei.com> Reviewed-by: N Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: N Yongqiang Liu <liuyongqiang13@huawei.com>
875ffd41 · Ma Wupeng · Yongqiang Liu · 2b491a2e · 875ffd41
隐藏空白更改
内联并排

Showing with 76 addition and 118 deletion

mm/page_alloc.c mm/page_alloc.c +76 -118

未找到文件。
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3671,60 +3671,6 @@ __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
 	return page;
 }

-#ifdef CONFIG_MEMORY_RELIABLE
-static inline void reliable_fb_find_zone(gfp_t gfp_mask,
-					 struct alloc_context *ac)
-{
-	if (!reliable_allow_fb_enabled())
-		return;
-
-	/* dst node don't have zone we want, fallback here */
-	if ((gfp_mask & __GFP_THISNODE) && (ac->high_zoneidx == ZONE_NORMAL) &&
-	    (gfp_mask & ___GFP_RELIABILITY)) {
-		ac->high_zoneidx = gfp_zone(gfp_mask & ~___GFP_RELIABILITY);
-		ac->preferred_zoneref = first_zones_zonelist(
-			ac->zonelist, ac->high_zoneidx, ac->nodemask);
-	}
-
-	return;
-}
-
-static inline struct page *
-reliable_fb_before_oom(gfp_t gfp_mask, int order,
-			  const struct alloc_context *ac)
-{
-	if (!reliable_allow_fb_enabled())
-		return NULL;
-
-	/* key user process alloc mem from movable zone to avoid oom */
-	if ((ac->high_zoneidx == ZONE_NORMAL) &&
-	    (gfp_mask & ___GFP_RELIABILITY)) {
-		struct alloc_context tmp_ac = *ac;
-
-		tmp_ac.high_zoneidx = ZONE_MOVABLE;
-		tmp_ac.preferred_zoneref = first_zones_zonelist(
-			ac->zonelist, ZONE_MOVABLE, ac->nodemask);
-		return get_page_from_freelist(
-			(gfp_mask | __GFP_HARDWALL) & ~__GFP_DIRECT_RECLAIM,
-			order, ALLOC_WMARK_HIGH | ALLOC_CPUSET, &tmp_ac);
-	}
-
-	return NULL;
-}
-#else
-static inline void reliable_fb_find_zone(gfp_t gfp_mask,
-					 struct alloc_context *ac)
-{
-	return;
-}
-
-static inline struct page *reliable_fb_before_oom(gfp_t gfp_mask, int order,
-						 const struct alloc_context *ac)
-{
-	return NULL;
-}
-#endif
-
 static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	const struct alloc_context *ac, unsigned long *did_some_progress)
@@ -3763,10 +3709,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	if (page)
 		goto out;

-	page = reliable_fb_before_oom(gfp_mask, order, ac);
-	if (page)
-		goto out;
-
 	/* Coredumps can quickly deplete all memory reserves */
 	if (current->flags & PF_DUMPCORE)
 		goto out;
@@ -4374,12 +4316,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 */
 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
 					ac->high_zoneidx, ac->nodemask);
-	if (!ac->preferred_zoneref->zone) {
-		reliable_fb_find_zone(gfp_mask, ac);
-
-		if (!ac->preferred_zoneref->zone)
-			goto nopage;
-	}
+	if (!ac->preferred_zoneref->zone)
+		goto nopage;

 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, gfp_mask, ac);
@@ -4638,74 +4576,94 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
 					ac->high_zoneidx, ac->nodemask);
 }

-/*
- * return false means this allocation is limit by reliable user limit and
- * this will lead to pagefault_out_of_memory()
- */
-static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order)
+static inline void prepare_before_alloc(gfp_t *gfp_mask)
 {
 	gfp_t gfp_ori = *gfp_mask;
+	bool zone_movable;
+
 	*gfp_mask &= gfp_allowed_mask;

 	if (!mem_reliable_is_enabled())
-		return true;
+		return;

-	if (*gfp_mask & __GFP_NOFAIL)
-		return true;
+	/*
+	 * memory reliable only handle memory allocation from movable zone
+	 * (force alloc from non-movable zone or force alloc from movable
+	 * zone) to get total isolation.
+	 */
+	zone_movable = gfp_zone(*gfp_mask) == ZONE_MOVABLE;
+	if (!zone_movable)
+		return;

 	if (gfp_ori & ___GFP_RELIABILITY) {
-		if (!(gfp_ori & __GFP_HIGHMEM) || !(gfp_ori & __GFP_MOVABLE))
-			return true;
+		*gfp_mask |= ___GFP_RELIABILITY;
+		return;
+	}

-		if (mem_reliable_watermark_ok(1 << order)) {
-			*gfp_mask |= ___GFP_RELIABILITY;
-			return true;
-		}
+	if (is_global_init(current) || (current->flags & PF_RELIABLE))
+		*gfp_mask |= ___GFP_RELIABILITY;
+}

-		if (reliable_allow_fb_enabled())
-			return true;
+/*
+ * return true means memory allocation need retry and flag ___GFP_RELIABILITY
+ * must be cleared.
+ */
+static inline bool check_after_alloc(gfp_t *gfp_mask, unsigned int order,
+				     int preferred_nid, nodemask_t *nodemask,
+				     struct page **_page)
+{
+	if (!mem_reliable_is_enabled())
+		return false;

+	if (!(*gfp_mask & ___GFP_RELIABILITY))
 		return false;
-	}

-	/*
-	 * Init tasks will alloc memory from non-mirrored region if their
-	 * allocation trigger task_reliable_limit
-	 */
-	if (is_global_init(current)) {
-		if (!mem_reliable_counter_initialized()) {
-			*gfp_mask |= ___GFP_RELIABILITY;
-			return true;
-		}
+	if (!*_page)
+		goto out_retry;

-		if (reliable_mem_limit_check(1 << order) &&
-		    mem_reliable_watermark_ok(1 << order))
-			*gfp_mask |= ___GFP_RELIABILITY;
-		return true;
-	}
+	if (*gfp_mask & __GFP_NOFAIL)
+		goto out;

-	/*
-	 * This only check task_reliable_limit without ___GFP_RELIABILITY
-	 * or this process is global init.
-	 * For kernel internal mechanism(hugepaged collapse and others)
-	 * If they alloc memory for user and obey task_reliable_limit, they
-	 * need to check this limit before allocing pages.
-	 */
-	if ((current->flags & PF_RELIABLE) && (gfp_ori & __GFP_HIGHMEM) &&
-	    (gfp_ori & __GFP_MOVABLE)) {
-		if (reliable_mem_limit_check(1 << order) &&
-		    mem_reliable_watermark_ok(1 << order)) {
-			*gfp_mask |= ___GFP_RELIABILITY;
-			return true;
-		}
+	/* check water mark, reserver mirrored mem for kernel */
+	if (!mem_reliable_watermark_ok(1 << order))
+		goto out_free_page;

-		if (reliable_allow_fb_enabled())
-			return true;
+	/* percpu counter is not initialized, ignore limit check */
+	if (!mem_reliable_counter_initialized())
+		goto out;

-		return false;
+	/* spcial user task, systemd is limited by task_reliable_limit */
+	if (((current->flags & PF_RELIABLE) || is_global_init(current)) &&
+	    !reliable_mem_limit_check(1 << order))
+		goto out_free_page;
+
+	goto out;
+
+out_free_page:
+	__free_pages(*_page, order);
+	*_page = NULL;
+
+out_retry:
+	if (reliable_allow_fb_enabled() || is_global_init(current)) {
+		*gfp_mask &= ~___GFP_RELIABILITY;
+		return true;
 	}

-	return true;
+	if (*gfp_mask & (__GFP_NORETRY | __GFP_RETRY_MAYFAIL | __GFP_THISNODE))
+		goto out;
+
+	/* Coredumps can quickly deplete all memory reserves */
+	if (current->flags & PF_DUMPCORE)
+		goto out;
+	/* The OOM killer will not help higher order allocs */
+	if (order > PAGE_ALLOC_COSTLY_ORDER)
+		goto out;
+
+	/* oom here */
+	mem_reliable_out_of_memory(*gfp_mask, order, preferred_nid,
+				nodemask);
+out:
+	return false;
 }

 /*
@@ -4729,12 +4687,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 		return NULL;
 	}

-	if (!prepare_before_alloc(&gfp_mask, order)) {
-		mem_reliable_out_of_memory(gfp_mask, order, preferred_nid,
-					   nodemask);
-		goto out;
-	}
+	prepare_before_alloc(&gfp_mask);

+retry:
 	alloc_mask = gfp_mask;
 	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
 		return NULL;
@@ -4771,6 +4726,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 		page = NULL;
 	}

+	if (check_after_alloc(&gfp_mask, order, preferred_nid, nodemask, &page))
+		goto retry;
+
 	trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);

 	return page;