提交 3023a4b3 编写于 作者: M Ma Wupeng 提交者: Yang Yingliang

mm: Introduce fallback mechanism for memory reliable

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Introduce fallback mechanism for memory reliable. The following process
will fallback to non-mirrored region if their allocation from mirrored
region failed

- User tasks with reliable flag
- thp collapse pages
- init tasks
- pagecache
- tmpfs

In order to achieve this goals. Buddy system will fallback to non-mirrored
in the following situations.

- if __GFP_THISNODE is set in gfp_mask and dest nodes do not have any zones
  available

- high_zoneidx will be set to ZONE_MOVABLE to alloc memory before oom

This mechanism is enabled by defalut and can be disabled by adding
"reliable_debug=F" to the kernel parameters. This mechanism rely on
CONFIG_MEMORY_RELIABLE and need "kernelcore=reliable" in the kernel
parameters.
Signed-off-by: NMa Wupeng <mawupeng1@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 1845e7ad
......@@ -1972,6 +1972,15 @@
Option "reliable" is base on option "mirror", but make
some extension. These two features are alternatives.
reliable_debug= [ARM64]
Format: [F]
Only works with CONFIG_MEMORY_RELIABLE and
"kernelcore=reliable" is configured.
F: User tasks with PF_RELIABLE will not allocate
memory from non-mirrored region if this allocation
from mirrored region failed.
Pagecache and tmpfs will follow this rule too.
kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
Format: <Controller#>[,poll interval]
The controller # is the number of the ehci usb debug
......
......@@ -16,6 +16,7 @@ extern struct static_key_false mem_reliable;
extern bool reliable_enabled;
extern atomic_long_t reliable_user_used_nr_page;
extern unsigned long task_reliable_limit __read_mostly;
extern bool reliable_allow_fallback;
extern void add_reliable_mem_size(long sz);
extern void mem_reliable_init(bool has_unmirrored_mem,
......@@ -69,8 +70,14 @@ static inline bool reliable_mem_limit_check(unsigned long nr_page)
return atomic_long_read(&reliable_user_used_nr_page) + nr_page <=
task_reliable_limit / PAGE_SIZE;
}
static inline bool reliable_allow_fb_enabled(void)
{
return reliable_allow_fallback;
}
#else
#define reliable_enabled 0
#define reliable_allow_fb_enabled() false
static inline bool mem_reliable_is_enabled(void) { return false; }
static inline void add_reliable_mem_size(long sz) {}
......
......@@ -1235,10 +1235,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
out_unmap:
pte_unmap_unlock(pte, ptl);
if (ret) {
if (reliable &&
!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
ret = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
if (reliable) {
if (!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
if (reliable_allow_fb_enabled()) {
reliable = false;
} else {
ret = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
}
}
node = khugepaged_find_target_node();
......@@ -1695,15 +1700,20 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
rcu_read_unlock();
if (result == SCAN_SUCCEED) {
if (reliable) {
if (!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
if (reliable_allow_fb_enabled()) {
reliable = false;
} else {
result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
}
}
if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
result = SCAN_EXCEED_NONE_PTE;
} else {
if (reliable &&
!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
node = khugepaged_find_target_node();
collapse_shmem(mm, mapping, start, hpage, node,
reliable);
......
......@@ -17,6 +17,7 @@ static atomic_long_t total_reliable_mem;
atomic_long_t reliable_user_used_nr_page;
/* reliable user limit for user tasks with reliable flag */
unsigned long task_reliable_limit = ULONG_MAX;
bool reliable_allow_fallback __read_mostly = true;
void add_reliable_mem_size(long sz)
{
......@@ -204,3 +205,31 @@ void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order,
out_of_memory(&oc);
mutex_unlock(&oom_lock);
}
static int __init setup_reliable_debug(char *str)
{
if (*str++ != '=' || !*str)
/*
* No options specified.
*/
goto out;
/*
* Determine which debug features should be switched on
*/
for (; *str && *str != ','; str++) {
switch (*str) {
case 'F':
reliable_allow_fallback = false;
pr_info("fallback disabled.");
break;
default:
pr_err("reliable_debug option '%c' unknown. skipped\n",
*str);
}
}
out:
return 1;
}
__setup("reliable_debug", setup_reliable_debug);
......@@ -3656,6 +3656,60 @@ __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
return page;
}
#ifdef CONFIG_MEMORY_RELIABLE
static inline struct zone *reliable_fb_find_zone(gfp_t gfp_mask,
struct alloc_context *ac)
{
if (!reliable_allow_fb_enabled())
return NULL;
/* dst nodemask may don't have zone we want, fallback here */
if ((gfp_mask & __GFP_THISNODE) && (ac->high_zoneidx == ZONE_NORMAL) &&
(gfp_mask & ___GFP_RELIABILITY)) {
struct zoneref *ref = first_zones_zonelist(
ac->zonelist, ZONE_MOVABLE, ac->nodemask);
return ref->zone;
}
return NULL;
}
static inline struct page *
reliable_fb_before_oom(gfp_t gfp_mask, int order,
const struct alloc_context *ac)
{
if (!reliable_allow_fb_enabled())
return NULL;
/* key user process alloc mem from movable zone to avoid oom */
if ((ac->high_zoneidx == ZONE_NORMAL) &&
(gfp_mask & ___GFP_RELIABILITY)) {
struct alloc_context tmp_ac = *ac;
tmp_ac.high_zoneidx = ZONE_MOVABLE;
tmp_ac.preferred_zoneref = first_zones_zonelist(
ac->zonelist, ZONE_MOVABLE, ac->nodemask);
return get_page_from_freelist(
(gfp_mask | __GFP_HARDWALL) & ~__GFP_DIRECT_RECLAIM,
order, ALLOC_WMARK_HIGH | ALLOC_CPUSET, &tmp_ac);
}
return NULL;
}
#else
static inline struct zone *reliable_fb_find_zone(gfp_t gfp_mask,
struct alloc_context *ac)
{
return NULL;
}
static inline struct page *reliable_fb_before_oom(gfp_t gfp_mask, int order,
const struct alloc_context *ac)
{
return NULL;
}
#endif
static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac, unsigned long *did_some_progress)
......@@ -3694,6 +3748,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
if (page)
goto out;
page = reliable_fb_before_oom(gfp_mask, order, ac);
if (page)
goto out;
/* Coredumps can quickly deplete all memory reserves */
if (current->flags & PF_DUMPCORE)
goto out;
......@@ -4301,8 +4359,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
*/
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->high_zoneidx, ac->nodemask);
if (!ac->preferred_zoneref->zone)
goto nopage;
if (!ac->preferred_zoneref->zone) {
ac->preferred_zoneref->zone =
reliable_fb_find_zone(gfp_mask, ac);
if (!ac->preferred_zoneref->zone)
goto nopage;
}
if (gfp_mask & __GFP_KSWAPD_RECLAIM)
wake_all_kswapds(order, gfp_mask, ac);
......@@ -4602,6 +4665,9 @@ static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order)
return true;
}
if (reliable_allow_fb_enabled())
return true;
return false;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册