提交 1845e7ad 编写于 作者: P Peng Wu 提交者: Yang Yingliang

mm: Add reliable memory use limit for user tasks

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

----------------------------------------------

there is a upper limit for special user tasks's memory allocation.
special user task means user task with reliable flag.

Init tasks will alloc memory from non-mirrored region if their allocation
trigger limit.

The limit can be set or access via /proc/sys/vm/task_reliable_limit

This limit's default value is ULONG_MAX.
Signed-off-by: NPeng Wu <wupeng58@huawei.com>
Signed-off-by: NMa Wupeng <mawupeng1@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 ff0fb9e8
......@@ -15,6 +15,7 @@ extern struct static_key_false mem_reliable;
extern bool reliable_enabled;
extern atomic_long_t reliable_user_used_nr_page;
extern unsigned long task_reliable_limit __read_mostly;
extern void add_reliable_mem_size(long sz);
extern void mem_reliable_init(bool has_unmirrored_mem,
......@@ -22,6 +23,9 @@ extern void mem_reliable_init(bool has_unmirrored_mem,
extern void reliable_report_meminfo(struct seq_file *m);
extern bool page_reliable(struct page *page);
extern void reliable_report_usage(struct seq_file *m, struct mm_struct *mm);
extern void reliable_show_mem_info(void);
extern void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order,
int preferred_nid, nodemask_t *nodemask);
static inline bool mem_reliable_is_enabled(void)
{
......@@ -59,6 +63,12 @@ static inline void reliable_page_counter(struct page *page,
atomic_long_add(val, &reliable_user_used_nr_page);
}
}
static inline bool reliable_mem_limit_check(unsigned long nr_page)
{
return atomic_long_read(&reliable_user_used_nr_page) + nr_page <=
task_reliable_limit / PAGE_SIZE;
}
#else
#define reliable_enabled 0
......@@ -78,6 +88,16 @@ static inline void reliable_page_counter(struct page *page,
static inline void reliable_report_usage(struct seq_file *m,
struct mm_struct *mm) {}
static inline bool reliable_mem_limit_check(unsigned long nr_page)
{
return false;
}
static inline void reliable_show_mem_info(void) {}
static inline void mem_reliable_out_of_memory(gfp_t gfp_mask,
unsigned int order,
int preferred_nid,
nodemask_t *nodemask) {}
#endif
#endif
......@@ -49,4 +49,5 @@ void show_mem(unsigned int filter, nodemask_t *nodemask)
#ifdef CONFIG_MEMORY_FAILURE
printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
#endif
reliable_show_mem_info();
}
......@@ -1235,6 +1235,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
out_unmap:
pte_unmap_unlock(pte, ptl);
if (ret) {
if (reliable &&
!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
ret = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
node = khugepaged_find_target_node();
/* collapse_huge_page will return with the mmap_sem released */
collapse_huge_page(mm, address, hpage, node,
......@@ -1692,6 +1698,12 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
result = SCAN_EXCEED_NONE_PTE;
} else {
if (reliable &&
!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
node = khugepaged_find_target_node();
collapse_shmem(mm, mapping, start, hpage, node,
reliable);
......@@ -1699,6 +1711,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
}
/* TODO: tracepoints */
out:
return;
}
#else
static void khugepaged_scan_shmem(struct mm_struct *mm,
......
......@@ -8,12 +8,15 @@
#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <linux/mmzone.h>
#include <linux/oom.h>
DEFINE_STATIC_KEY_FALSE(mem_reliable);
bool reliable_enabled;
static atomic_long_t total_reliable_mem;
atomic_long_t reliable_user_used_nr_page;
/* reliable user limit for user tasks with reliable flag */
unsigned long task_reliable_limit = ULONG_MAX;
void add_reliable_mem_size(long sz)
{
......@@ -118,3 +121,86 @@ void reliable_report_usage(struct seq_file *m, struct mm_struct *mm)
atomic_long_read(&mm->reliable_nr_page));
}
}
#ifdef CONFIG_SYSCTL
int reliable_limit_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
unsigned long old = task_reliable_limit;
int ret;
ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (ret == 0 && write) {
if (task_reliable_limit > total_reliable_mem_sz()) {
task_reliable_limit = old;
return -EINVAL;
}
}
return ret;
}
static struct ctl_table reliable_ctl_table[] = {
{
.procname = "task_reliable_limit",
.data = &task_reliable_limit,
.maxlen = sizeof(task_reliable_limit),
.mode = 0644,
.proc_handler = reliable_limit_handler,
},
{}
};
static struct ctl_table reliable_dir_table[] = {
{
.procname = "vm",
.maxlen = 0,
.mode = 0555,
.child = reliable_ctl_table,
},
{}
};
static int __init reliable_sysctl_init(void)
{
if (!mem_reliable_is_enabled())
return 0;
if (!register_sysctl_table(reliable_dir_table)) {
pr_err("register sysctl failed.");
return -1;
}
return 0;
}
late_initcall(reliable_sysctl_init);
#endif
void reliable_show_mem_info(void)
{
if (mem_reliable_is_enabled()) {
pr_info("ReliableTotal: %lu kB", total_reliable_mem_sz() >> 10);
pr_info("ReliableUsed: %lu kB", used_reliable_mem_sz() >> 10);
pr_info("task_reliable_limit: %lu kB",
task_reliable_limit >> 10);
pr_info("reliable_user_used: %ld kB",
atomic_long_read(&reliable_user_used_nr_page) * 4);
}
}
void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order,
int preferred_nid, nodemask_t *nodemask)
{
struct oom_control oc = {
.zonelist = node_zonelist(preferred_nid, gfp_mask),
.nodemask = nodemask,
.memcg = NULL,
.gfp_mask = gfp_mask,
.order = order,
};
if (!mutex_trylock(&oom_lock))
return;
out_of_memory(&oc);
mutex_unlock(&oom_lock);
}
......@@ -4561,19 +4561,51 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
ac->high_zoneidx, ac->nodemask);
}
static inline void prepare_before_alloc(gfp_t *gfp_mask)
/*
* return false means this allocation is limit by reliable user limit and
* this will lead to pagefault_out_of_memory()
*/
static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order)
{
gfp_t gfp_ori = *gfp_mask;
*gfp_mask &= gfp_allowed_mask;
if (!mem_reliable_is_enabled())
return;
return true;
if (gfp_ori & ___GFP_RELIABILITY)
if (gfp_ori & ___GFP_RELIABILITY) {
*gfp_mask |= ___GFP_RELIABILITY;
return true;
}
if (current->flags & PF_RELIABLE || is_global_init(current))
*gfp_mask |= ___GFP_RELIABILITY;
/*
* Init tasks will alloc memory from non-mirrored region if their
* allocation trigger task_reliable_limit
*/
if (is_global_init(current)) {
if (reliable_mem_limit_check(1 << order))
*gfp_mask |= ___GFP_RELIABILITY;
return true;
}
/*
* This only check task_reliable_limit without ___GFP_RELIABILITY
* or this process is global init.
* For kernel internal mechanism(hugepaged collapse and others)
* If they alloc memory for user and obey task_reliable_limit, they
* need to check this limit before allocing pages.
*/
if ((current->flags & PF_RELIABLE) && (gfp_ori & __GFP_HIGHMEM) &&
(gfp_ori & __GFP_MOVABLE)) {
if (reliable_mem_limit_check(1 << order)) {
*gfp_mask |= ___GFP_RELIABILITY;
return true;
}
return false;
}
return true;
}
/*
......@@ -4583,7 +4615,7 @@ struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
nodemask_t *nodemask)
{
struct page *page;
struct page *page = NULL;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = { };
......@@ -4597,7 +4629,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
return NULL;
}
prepare_before_alloc(&gfp_mask);
if (!prepare_before_alloc(&gfp_mask, order)) {
mem_reliable_out_of_memory(gfp_mask, order, preferred_nid,
nodemask);
goto out;
}
alloc_mask = gfp_mask;
if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册