From 6332d4e3580bd64a621edc179c3c6abb461192f5 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 2 Aug 2019 02:01:40 +0800 Subject: [PATCH] alinux: mm: memcontrol: treat memcg wmark reclaim work as kswapd Since background water mark reclaim is scheduled by workqueue, it could do more work than direct reclaim, i.e. write out dirty page, etc. So, add PF_KSWAPD flag, so that current_is_kswapd() would return true for memcg background reclaim. The condition "current_is_kswapd() && !global_reclaim(sc)" is good enough to tell current is global kswapd or memcg background reclaim. And, kswapd is not allowed to break memory.low protection for now, memcg kswapd should not break it either. Reviewed-by: Gavin Shan Reviewed-by: Xunlei Pang Signed-off-by: Yang Shi --- mm/memcontrol.c | 4 ++-- mm/vmscan.c | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2fd4751f5804..6aaaaa3ae090 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2181,9 +2181,9 @@ static void wmark_work_func(struct work_struct *work) memcg = container_of(work, struct mem_cgroup, wmark_work); - current->flags |= PF_SWAPWRITE | PF_MEMALLOC; + current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD; reclaim_wmark(memcg); - current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC); + current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD); } static void reclaim_high(struct mem_cgroup *memcg, diff --git a/mm/vmscan.c b/mm/vmscan.c index c58c0328903e..d0a203cddb09 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2755,9 +2755,13 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) * nr_to_reclaim pages to be reclaimed and it will * retry with decreasing priority if one round over the * whole hierarchy is not sufficient. + * + * Memcg background reclaim would break iter once water + * mark is satisfied. */ if (!global_reclaim(sc) && - sc->nr_reclaimed >= sc->nr_to_reclaim) { + ((sc->nr_reclaimed >= sc->nr_to_reclaim) || + (current_is_kswapd() && is_wmark_ok(root, false)))) { mem_cgroup_iter_break(root, memcg); break; } @@ -2776,7 +2780,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) if (sc->nr_reclaimed - nr_reclaimed) reclaimable = true; - if (current_is_kswapd()) { + if (current_is_kswapd() && global_reclaim(sc)) { /* * If reclaim is isolating dirty pages under writeback, * it implies that the long-lived page allocation rate @@ -3022,6 +3026,10 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1); do { + if (current_is_kswapd() && !global_reclaim(sc) && + is_wmark_ok(sc->target_mem_cgroup, false)) + break; + vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, sc->priority); sc->nr_scanned = 0; @@ -3060,8 +3068,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, if (sc->compaction_ready) return 1; - /* Untapped cgroup reserves? Don't OOM, retry. */ - if (sc->memcg_low_skipped) { + /* + * Untapped cgroup reserves? Don't OOM, retry. + * + * Memcg kswapd should not break low protection. + */ + if (sc->memcg_low_skipped && !current_is_kswapd()) { sc->priority = initial_priority; sc->memcg_low_reclaim = 1; sc->memcg_low_skipped = 0; -- GitLab