提交 0122fd48 编写于 作者: M Ma Wupeng 提交者: Zheng Zengkai

mm: Add reliable memory use limit for user tasks

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

There is a upper limit for all memory allocation if the following condtions
are met:
- gfp_zone(gfp & ~ GFP_RELIABLE) == ZONE_MOVABLE
- gfp & GFP_RELIABLE is true

Init tasks will alloc memory from non-mirrored region if their allocation
trigger limit.

The limit can be set or access via /proc/sys/vm/task_reliable_limit

This limit's default value is ULONG_MAX. User can update this value between
current user used reliable memory size and total reliable memory size.
Signed-off-by: NMa Wupeng <mawupeng1@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
上级 545f22f9
......@@ -5,8 +5,10 @@
#include <linux/stddef.h>
#include <linux/gfp.h>
#include <linux/mmzone.h>
#include <linux/oom.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <linux/percpu_counter.h>
#ifdef CONFIG_MEMORY_RELIABLE
......@@ -17,6 +19,7 @@ extern bool shmem_reliable;
extern bool pagecache_use_reliable_mem;
extern struct percpu_counter pagecache_reliable_pages;
extern struct percpu_counter anon_reliable_pages;
extern unsigned long task_reliable_limit __read_mostly;
extern void mem_reliable_init(bool has_unmirrored_mem,
unsigned long *zone_movable_pfn,
......@@ -30,6 +33,8 @@ extern void reliable_lru_add(enum lru_list lru, struct page *page,
extern void reliable_lru_add_batch(int zid, enum lru_list lru,
int val);
extern bool mem_reliable_counter_initialized(void);
extern void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order,
int preferred_nid, nodemask_t *nodemask);
static inline bool mem_reliable_is_enabled(void)
{
......@@ -74,6 +79,31 @@ static inline bool page_reliable(struct page *page)
return page_zonenum(page) < ZONE_MOVABLE;
}
static inline u64 task_reliable_used_pages(void)
{
s64 nr_pages;
nr_pages = percpu_counter_read_positive(&pagecache_reliable_pages);
nr_pages += percpu_counter_read_positive(&anon_reliable_pages);
return nr_pages;
}
static inline bool reliable_mem_limit_check(unsigned long nr_page)
{
return (task_reliable_used_pages() + nr_page) <=
(task_reliable_limit >> PAGE_SHIFT);
}
static inline bool mem_reliable_should_reclaim(void)
{
if (percpu_counter_sum_positive(&pagecache_reliable_pages) >=
MAX_ORDER_NR_PAGES)
return true;
return false;
}
#else
#define reliable_enabled 0
#define pagecache_use_reliable_mem 0
......@@ -98,6 +128,16 @@ static inline void reliable_lru_add(enum lru_list lru, struct page *page,
static inline void reliable_lru_add_batch(int zid, enum lru_list lru,
int val) {}
static inline bool mem_reliable_counter_initialized(void) { return false; }
static inline u64 task_reliable_used_pages(void) { return 0; }
static inline bool reliable_mem_limit_check(unsigned long nr_page)
{
return false;
}
static inline bool mem_reliable_should_reclaim(void) { return false; }
static inline void mem_reliable_out_of_memory(gfp_t gfp_mask,
unsigned int order,
int preferred_nid,
nodemask_t *nodemask) {}
#endif
#endif
......@@ -20,6 +20,8 @@ bool pagecache_use_reliable_mem __read_mostly = true;
struct percpu_counter pagecache_reliable_pages;
struct percpu_counter anon_reliable_pages;
static unsigned long reliable_pagecache_max_bytes = ULONG_MAX;
/* reliable user limit for user tasks with reliable flag */
unsigned long task_reliable_limit = ULONG_MAX;
bool mem_reliable_counter_initialized(void)
{
......@@ -178,6 +180,25 @@ void reliable_report_meminfo(struct seq_file *m)
}
}
int reliable_limit_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
unsigned long old = task_reliable_limit;
int ret;
ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (ret == 0 && write) {
if (task_reliable_limit > PAGES_TO_B(total_reliable_pages()) ||
task_reliable_limit <
(task_reliable_used_pages() << PAGE_SHIFT)) {
task_reliable_limit = old;
return -EINVAL;
}
}
return ret;
}
int reliable_pagecache_max_bytes_write(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
......@@ -204,6 +225,13 @@ static struct ctl_table reliable_ctl_table[] = {
.mode = 0644,
.proc_handler = reliable_pagecache_max_bytes_write,
},
{
.procname = "task_reliable_limit",
.data = &task_reliable_limit,
.maxlen = sizeof(task_reliable_limit),
.mode = 0644,
.proc_handler = reliable_limit_handler,
},
{}
};
......@@ -234,6 +262,23 @@ static int __init reliable_sysctl_init(void)
}
arch_initcall(reliable_sysctl_init);
void mem_reliable_out_of_memory(gfp_t gfp, unsigned int order,
int preferred_nid, nodemask_t *nodemask)
{
struct oom_control oc = {
.zonelist = node_zonelist(preferred_nid, gfp),
.nodemask = nodemask,
.memcg = NULL,
.gfp_mask = gfp,
.order = order,
};
if (!mutex_trylock(&oom_lock))
return;
out_of_memory(&oc);
mutex_unlock(&oom_lock);
}
static int __init setup_reliable_debug(char *str)
{
if (*str++ != '=' || !*str)
......
......@@ -5166,6 +5166,89 @@ static inline void prepare_before_alloc(gfp_t *gfp_mask)
*gfp_mask &= ~GFP_RELIABLE;
}
static inline long mem_reliable_direct_reclaim(int nr_pages, struct alloc_context *ac)
{
long nr_reclaimed = 0;
while (nr_reclaimed < nr_pages) {
/* try to free cache from reliable region */
long progress = __perform_reclaim(GFP_KERNEL, 0, ac);
nr_reclaimed += progress;
if (progress < SWAP_CLUSTER_MAX)
break;
}
return nr_reclaimed;
}
/*
* return true means memory allocation need retry and flag ___GFP_RELIABILITY
* must be cleared.
*/
static inline bool check_after_alloc(gfp_t *gfp, unsigned int order,
int preferred_nid,
struct alloc_context *ac,
struct page **_page)
{
int retry_times = MAX_RECLAIM_RETRIES;
int nr_pages;
if (!mem_reliable_is_enabled())
return false;
if (!(*gfp & GFP_RELIABLE))
return false;
if (!*_page)
goto out_retry;
if (*gfp & __GFP_NOFAIL || current->flags & PF_MEMALLOC)
goto out;
/* percpu counter is not initialized, ignore limit check */
if (!mem_reliable_counter_initialized())
goto out;
limit_check:
/* user task is limited by task_reliable_limit */
if (!reliable_mem_limit_check(1 << order))
goto out_free_page;
goto out;
out_free_page:
if (mem_reliable_should_reclaim() && retry_times--) {
nr_pages = mem_reliable_direct_reclaim(1 << order, ac);
if (nr_pages)
goto limit_check;
}
__free_pages(*_page, order);
*_page = NULL;
out_retry:
if (is_global_init(current)) {
*gfp &= ~GFP_RELIABLE;
return true;
}
if (*gfp & (__GFP_NORETRY | __GFP_RETRY_MAYFAIL | __GFP_THISNODE))
goto out;
/* Coredumps can quickly deplete all memory reserves */
if (current->flags & PF_DUMPCORE)
goto out;
/* The OOM killer will not help higher order allocs */
if (order > PAGE_ALLOC_COSTLY_ORDER)
goto out;
/* oom here */
mem_reliable_out_of_memory(*gfp, order, preferred_nid, ac->nodemask);
out:
return false;
}
/*
* This is the 'heart' of the zoned buddy allocator.
*/
......@@ -5190,6 +5273,7 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
prepare_before_alloc(&gfp);
retry:
alloc_gfp = gfp;
if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac,
&alloc_gfp, &alloc_flags))
......@@ -5235,6 +5319,9 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
page = NULL;
}
if (check_after_alloc(&gfp, order, preferred_nid, &ac, &page))
goto retry;
trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
return page;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册