提交 3023a4b3 编写于 作者: M Ma Wupeng 提交者: Yang Yingliang

mm: Introduce fallback mechanism for memory reliable

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Introduce fallback mechanism for memory reliable. The following process
will fallback to non-mirrored region if their allocation from mirrored
region failed

- User tasks with reliable flag
- thp collapse pages
- init tasks
- pagecache
- tmpfs

In order to achieve this goals. Buddy system will fallback to non-mirrored
in the following situations.

- if __GFP_THISNODE is set in gfp_mask and dest nodes do not have any zones
  available

- high_zoneidx will be set to ZONE_MOVABLE to alloc memory before oom

This mechanism is enabled by defalut and can be disabled by adding
"reliable_debug=F" to the kernel parameters. This mechanism rely on
CONFIG_MEMORY_RELIABLE and need "kernelcore=reliable" in the kernel
parameters.
Signed-off-by: NMa Wupeng <mawupeng1@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 1845e7ad
...@@ -1972,6 +1972,15 @@ ...@@ -1972,6 +1972,15 @@
Option "reliable" is base on option "mirror", but make Option "reliable" is base on option "mirror", but make
some extension. These two features are alternatives. some extension. These two features are alternatives.
reliable_debug= [ARM64]
Format: [F]
Only works with CONFIG_MEMORY_RELIABLE and
"kernelcore=reliable" is configured.
F: User tasks with PF_RELIABLE will not allocate
memory from non-mirrored region if this allocation
from mirrored region failed.
Pagecache and tmpfs will follow this rule too.
kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
Format: <Controller#>[,poll interval] Format: <Controller#>[,poll interval]
The controller # is the number of the ehci usb debug The controller # is the number of the ehci usb debug
......
...@@ -16,6 +16,7 @@ extern struct static_key_false mem_reliable; ...@@ -16,6 +16,7 @@ extern struct static_key_false mem_reliable;
extern bool reliable_enabled; extern bool reliable_enabled;
extern atomic_long_t reliable_user_used_nr_page; extern atomic_long_t reliable_user_used_nr_page;
extern unsigned long task_reliable_limit __read_mostly; extern unsigned long task_reliable_limit __read_mostly;
extern bool reliable_allow_fallback;
extern void add_reliable_mem_size(long sz); extern void add_reliable_mem_size(long sz);
extern void mem_reliable_init(bool has_unmirrored_mem, extern void mem_reliable_init(bool has_unmirrored_mem,
...@@ -69,8 +70,14 @@ static inline bool reliable_mem_limit_check(unsigned long nr_page) ...@@ -69,8 +70,14 @@ static inline bool reliable_mem_limit_check(unsigned long nr_page)
return atomic_long_read(&reliable_user_used_nr_page) + nr_page <= return atomic_long_read(&reliable_user_used_nr_page) + nr_page <=
task_reliable_limit / PAGE_SIZE; task_reliable_limit / PAGE_SIZE;
} }
static inline bool reliable_allow_fb_enabled(void)
{
return reliable_allow_fallback;
}
#else #else
#define reliable_enabled 0 #define reliable_enabled 0
#define reliable_allow_fb_enabled() false
static inline bool mem_reliable_is_enabled(void) { return false; } static inline bool mem_reliable_is_enabled(void) { return false; }
static inline void add_reliable_mem_size(long sz) {} static inline void add_reliable_mem_size(long sz) {}
......
...@@ -1235,10 +1235,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, ...@@ -1235,10 +1235,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
out_unmap: out_unmap:
pte_unmap_unlock(pte, ptl); pte_unmap_unlock(pte, ptl);
if (ret) { if (ret) {
if (reliable && if (reliable) {
!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) { if (!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
ret = SCAN_ALLOC_HUGE_PAGE_FAIL; if (reliable_allow_fb_enabled()) {
goto out; reliable = false;
} else {
ret = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
}
} }
node = khugepaged_find_target_node(); node = khugepaged_find_target_node();
...@@ -1695,15 +1700,20 @@ static void khugepaged_scan_shmem(struct mm_struct *mm, ...@@ -1695,15 +1700,20 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
rcu_read_unlock(); rcu_read_unlock();
if (result == SCAN_SUCCEED) { if (result == SCAN_SUCCEED) {
if (reliable) {
if (!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
if (reliable_allow_fb_enabled()) {
reliable = false;
} else {
result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
}
}
if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) { if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
result = SCAN_EXCEED_NONE_PTE; result = SCAN_EXCEED_NONE_PTE;
} else { } else {
if (reliable &&
!reliable_mem_limit_check(1 << HPAGE_PMD_ORDER)) {
result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
node = khugepaged_find_target_node(); node = khugepaged_find_target_node();
collapse_shmem(mm, mapping, start, hpage, node, collapse_shmem(mm, mapping, start, hpage, node,
reliable); reliable);
......
...@@ -17,6 +17,7 @@ static atomic_long_t total_reliable_mem; ...@@ -17,6 +17,7 @@ static atomic_long_t total_reliable_mem;
atomic_long_t reliable_user_used_nr_page; atomic_long_t reliable_user_used_nr_page;
/* reliable user limit for user tasks with reliable flag */ /* reliable user limit for user tasks with reliable flag */
unsigned long task_reliable_limit = ULONG_MAX; unsigned long task_reliable_limit = ULONG_MAX;
bool reliable_allow_fallback __read_mostly = true;
void add_reliable_mem_size(long sz) void add_reliable_mem_size(long sz)
{ {
...@@ -204,3 +205,31 @@ void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order, ...@@ -204,3 +205,31 @@ void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order,
out_of_memory(&oc); out_of_memory(&oc);
mutex_unlock(&oom_lock); mutex_unlock(&oom_lock);
} }
static int __init setup_reliable_debug(char *str)
{
if (*str++ != '=' || !*str)
/*
* No options specified.
*/
goto out;
/*
* Determine which debug features should be switched on
*/
for (; *str && *str != ','; str++) {
switch (*str) {
case 'F':
reliable_allow_fallback = false;
pr_info("fallback disabled.");
break;
default:
pr_err("reliable_debug option '%c' unknown. skipped\n",
*str);
}
}
out:
return 1;
}
__setup("reliable_debug", setup_reliable_debug);
...@@ -3656,6 +3656,60 @@ __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order, ...@@ -3656,6 +3656,60 @@ __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
return page; return page;
} }
#ifdef CONFIG_MEMORY_RELIABLE
static inline struct zone *reliable_fb_find_zone(gfp_t gfp_mask,
struct alloc_context *ac)
{
if (!reliable_allow_fb_enabled())
return NULL;
/* dst nodemask may don't have zone we want, fallback here */
if ((gfp_mask & __GFP_THISNODE) && (ac->high_zoneidx == ZONE_NORMAL) &&
(gfp_mask & ___GFP_RELIABILITY)) {
struct zoneref *ref = first_zones_zonelist(
ac->zonelist, ZONE_MOVABLE, ac->nodemask);
return ref->zone;
}
return NULL;
}
static inline struct page *
reliable_fb_before_oom(gfp_t gfp_mask, int order,
const struct alloc_context *ac)
{
if (!reliable_allow_fb_enabled())
return NULL;
/* key user process alloc mem from movable zone to avoid oom */
if ((ac->high_zoneidx == ZONE_NORMAL) &&
(gfp_mask & ___GFP_RELIABILITY)) {
struct alloc_context tmp_ac = *ac;
tmp_ac.high_zoneidx = ZONE_MOVABLE;
tmp_ac.preferred_zoneref = first_zones_zonelist(
ac->zonelist, ZONE_MOVABLE, ac->nodemask);
return get_page_from_freelist(
(gfp_mask | __GFP_HARDWALL) & ~__GFP_DIRECT_RECLAIM,
order, ALLOC_WMARK_HIGH | ALLOC_CPUSET, &tmp_ac);
}
return NULL;
}
#else
static inline struct zone *reliable_fb_find_zone(gfp_t gfp_mask,
struct alloc_context *ac)
{
return NULL;
}
static inline struct page *reliable_fb_before_oom(gfp_t gfp_mask, int order,
const struct alloc_context *ac)
{
return NULL;
}
#endif
static inline struct page * static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
const struct alloc_context *ac, unsigned long *did_some_progress) const struct alloc_context *ac, unsigned long *did_some_progress)
...@@ -3694,6 +3748,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, ...@@ -3694,6 +3748,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
if (page) if (page)
goto out; goto out;
page = reliable_fb_before_oom(gfp_mask, order, ac);
if (page)
goto out;
/* Coredumps can quickly deplete all memory reserves */ /* Coredumps can quickly deplete all memory reserves */
if (current->flags & PF_DUMPCORE) if (current->flags & PF_DUMPCORE)
goto out; goto out;
...@@ -4301,8 +4359,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -4301,8 +4359,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
*/ */
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->high_zoneidx, ac->nodemask); ac->high_zoneidx, ac->nodemask);
if (!ac->preferred_zoneref->zone) if (!ac->preferred_zoneref->zone) {
goto nopage; ac->preferred_zoneref->zone =
reliable_fb_find_zone(gfp_mask, ac);
if (!ac->preferred_zoneref->zone)
goto nopage;
}
if (gfp_mask & __GFP_KSWAPD_RECLAIM) if (gfp_mask & __GFP_KSWAPD_RECLAIM)
wake_all_kswapds(order, gfp_mask, ac); wake_all_kswapds(order, gfp_mask, ac);
...@@ -4602,6 +4665,9 @@ static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order) ...@@ -4602,6 +4665,9 @@ static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order)
return true; return true;
} }
if (reliable_allow_fb_enabled())
return true;
return false; return false;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册