提交 1496d67c 编写于 作者: L Lu Jialin 提交者: Zheng Zengkai

memcg: support memcg sync reclaim work as kswapd

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4IMAK?from=project-issue
CVE: NA

--------

Since memory.high reclaim is sync whether is in interrupt, it could
do more work than direct reclaim, i.e. write out dirty page, etc.

So, add PF_KSWAPD flag, so that current_is_kswapd() would return true
for memcg kswapd.

Memcg kswapd should stop when usage of memcg fit the memcg kswapd stop
flag. When the userland sets the memcg->memory.max, the stop_flag is
(memcg->memory.high - memcg->memory.max * 10 / 1000), which is similar
with global kswapd. Otherwise, the stop_flag is (memcg->memory.high -
memcg->memory.high / 6), which is similar with most difference between
watermark_low and watermark_high.

And, memcg kswapd should not break memory.low protection for now.
Signed-off-by: NLu Jialin <lujialin4@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Nweiyang wang <wangweiyang2@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 6a7b3e98
......@@ -2364,8 +2364,10 @@ static void high_work_func(struct work_struct *work)
{
struct mem_cgroup *memcg;
current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD;
memcg = container_of(work, struct mem_cgroup, high_work);
reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL);
current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD);
}
/*
......@@ -2535,9 +2537,11 @@ void mem_cgroup_handle_over_high(void)
* memory.high is currently batched, whereas memory.max and the page
* allocator run every time an allocation is made.
*/
current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD;
nr_reclaimed = reclaim_high(memcg,
in_retry ? SWAP_CLUSTER_MAX : nr_pages,
GFP_KERNEL);
current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD);
/*
* memory.high is breached and reclaim is unable to keep up. Throttle
......
......@@ -61,6 +61,8 @@
#include "internal.h"
#define MEMCG_KSWAPD_SCATOR 10
#define CREATE_TRACE_POINTS
#include <trace/events/vmscan.h>
......@@ -2834,6 +2836,24 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
return inactive_lru_pages > pages_for_compaction;
}
static bool is_memcg_kswapd_stopped(struct scan_control *sc)
{
struct mem_cgroup *memcg = sc->target_mem_cgroup;
bool is_stop = false;
unsigned long stop_flag = 0;
if (!cgroup_reclaim(sc))
return false;
if (memcg->memory.max == PAGE_COUNTER_MAX)
stop_flag = memcg->memory.high / 6;
else
stop_flag = memcg->memory.high - memcg->memory.max *
MEMCG_KSWAPD_SCATOR / 1000;
is_stop = page_counter_read(&memcg->memory) < stop_flag;
return (current_is_kswapd() && is_stop);
}
static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
{
struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
......@@ -2889,6 +2909,14 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
sc->nr_scanned - scanned,
sc->nr_reclaimed - reclaimed);
/*
* Memcg background reclaim would break iter once memcg kswapd
* flag is satisfied.
*/
if (is_memcg_kswapd_stopped(sc)) {
mem_cgroup_iter_break(target_memcg, memcg);
break;
}
} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
}
......@@ -3257,6 +3285,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
__count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
do {
if (is_memcg_kswapd_stopped(sc))
break;
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
sc->nr_scanned = 0;
......@@ -3319,8 +3350,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
goto retry;
}
/* Untapped cgroup reserves? Don't OOM, retry. */
if (sc->memcg_low_skipped) {
/*
* Untapped cgroup reserves? Don't OOM, retry.
* memcg usage is lower than memory.high / 2, memcg kswapd will lead to
* stop memcg reclaim, but should not break low protection.
*/
if (sc->memcg_low_skipped &&
!(current_is_kswapd() && cgroup_reclaim(sc))) {
sc->priority = initial_priority;
sc->force_deactivate = 0;
sc->memcg_low_reclaim = 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册