alinux: mm: memcontrol: treat memcg wmark reclaim work as kswapd

Since background water mark reclaim is scheduled by workqueue, it could do more work than direct reclaim, i.e. write out dirty page, etc. So, add PF_KSWAPD flag, so that current_is_kswapd() would return true for memcg background reclaim. The condition "current_is_kswapd() && !global_reclaim(sc)" is good enough to tell current is global kswapd or memcg background reclaim. And, kswapd is not allowed to break memory.low protection for now, memcg kswapd should not break it either. Reviewed-by: N Gavin Shan <shan.gavin@linux.alibaba.com> Reviewed-by: N Xunlei Pang <xlpang@linux.alibaba.com> Signed-off-by: N Yang Shi <yang.shi@linux.alibaba.com>

alinux: mm: memcontrol: treat memcg wmark reclaim work as kswapd
Since background water mark reclaim is scheduled by workqueue, it could do more work than direct reclaim, i.e. write out dirty page, etc. So, add PF_KSWAPD flag, so that current_is_kswapd() would return true for memcg background reclaim. The condition "current_is_kswapd() && !global_reclaim(sc)" is good enough to tell current is global kswapd or memcg background reclaim. And, kswapd is not allowed to break memory.low protection for now, memcg kswapd should not break it either. Reviewed-by: N Gavin Shan <shan.gavin@linux.alibaba.com> Reviewed-by: N Xunlei Pang <xlpang@linux.alibaba.com> Signed-off-by: N Yang Shi <yang.shi@linux.alibaba.com>
0956a655 · Yang Shi · Joseph Qi · 2053fb86 · 0956a655 · 0956a655
隐藏空白更改
内联并排

Showing with 18 addition and 6 deletion

mm/memcontrol.c mm/memcontrol.c +2 -2

mm/vmscan.c mm/vmscan.c +16 -4

未找到文件。
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2181,9 +2181,9 @@ static void wmark_work_func(struct work_struct *work)
 	memcg = container_of(work, struct mem_cgroup, wmark_work);
-	current->flags |= PF_SWAPWRITE | PF_MEMALLOC;
+	current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD;
 	reclaim_wmark(memcg);
-	current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC);
+	current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD);
 }
 static void reclaim_high(struct mem_cgroup *memcg,

--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2755,9 +2755,13 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 			 * nr_to_reclaim pages to be reclaimed and it will
 			 * retry with decreasing priority if one round over the
 			 * whole hierarchy is not sufficient.
+			 *
+			 * Memcg background reclaim would break iter once water
+			 * mark is satisfied.
 			 */
 			if (!global_reclaim(sc) &&
-					sc->nr_reclaimed >= sc->nr_to_reclaim) {
+			    ((sc->nr_reclaimed >= sc->nr_to_reclaim) ||
+			    (current_is_kswapd() && is_wmark_ok(root, false)))) {
 				mem_cgroup_iter_break(root, memcg);
 				break;
 			}
@@ -2776,7 +2780,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 		if (sc->nr_reclaimed - nr_reclaimed)
 			reclaimable = true;
-		if (current_is_kswapd()) {
+		if (current_is_kswapd() && global_reclaim(sc)) {
 			/*
 			 * If reclaim is isolating dirty pages under writeback,
 			 * it implies that the long-lived page allocation rate
@@ -3022,6 +3026,10 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 		__count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
 	do {
+		if (current_is_kswapd() && !global_reclaim(sc) &&
+		    is_wmark_ok(sc->target_mem_cgroup, false))
+			break;
 		vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
 				sc->priority);
 		sc->nr_scanned = 0;
@@ -3060,8 +3068,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	if (sc->compaction_ready)
 		return 1;
-	/* Untapped cgroup reserves?  Don't OOM, retry. */
+	/*
-	if (sc->memcg_low_skipped) {
+	 * Untapped cgroup reserves?  Don't OOM, retry.
+	 *
+	 * Memcg kswapd should not break low protection.
+	 */
+	if (sc->memcg_low_skipped && !current_is_kswapd()) {
 		sc->priority = initial_priority;
 		sc->memcg_low_reclaim = 1;
 		sc->memcg_low_skipped = 0;