提交 6c59ddf2 编写于 作者: M Ma Wupeng 提交者: Zheng Zengkai

mm: Introduce memory reliable

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4PM01
CVE: NA

--------------------------------

Introduction

============

Memory reliable feature is a memory tiering mechanism. It is based on
kernel mirror feature, which splits memory into two sperate regions,
mirrored(reliable) region and non-mirrored (non-reliable) region.

for kernel mirror feature:

- allocate kernel memory from mirrored region by default
- allocate user memory from non-mirrored region by default

non-mirrored region will be arranged into ZONE_MOVABLE.

for kernel reliable feature, it has additional features below:

- normal user tasks never alloc memory from mirrored region with userspace
  apis(malloc, mmap, etc.)
- special user tasks will allocate memory from mirrored region by default
- tmpfs/pagecache allocate memory from mirrored region by default
- upper limit of mirrored region allcated for user tasks, tmpfs and
  pagecache

Support Reliable fallback mechanism which allows special user tasks, tmpfs
and pagecache can fallback to alloc non-mirrored region, it's the default
setting.

In order to fulfil the goal

- ___GFP_RELIABLE flag added for alloc memory from mirrored region.

- the high_zoneidx for special user tasks/tmpfs/pagecache is set to
  ZONE_NORMAL.

- normal user tasks could only alloc from ZONE_MOVABLE.

This patch is just the main framework, memory reliable support for special
user tasks, pagecache and tmpfs has own patches.

To enable this function, mirrored(reliable) memory is needed and
"kernelcore=reliable" should be added to kernel parameters.
Signed-off-by: NMa Wupeng <mawupeng1@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 856090e5
...@@ -2227,7 +2227,7 @@ ...@@ -2227,7 +2227,7 @@
keepinitrd [HW,ARM] keepinitrd [HW,ARM]
kernelcore= [KNL,X86,IA-64,PPC,ARM64] kernelcore= [KNL,X86,IA-64,PPC,ARM64]
Format: nn[KMGTPE] | nn% | "mirror" Format: nn[KMGTPE] | nn% | "mirror" | "reliable"
This parameter specifies the amount of memory usable by This parameter specifies the amount of memory usable by
the kernel for non-movable allocations. The requested the kernel for non-movable allocations. The requested
amount is spread evenly throughout all nodes in the amount is spread evenly throughout all nodes in the
...@@ -2251,6 +2251,9 @@ ...@@ -2251,6 +2251,9 @@
for Movable pages. "nn[KMGTPE]", "nn%", and "mirror" for Movable pages. "nn[KMGTPE]", "nn%", and "mirror"
are exclusive, so you cannot specify multiple forms. are exclusive, so you cannot specify multiple forms.
Option "reliable" is base on option "mirror", but make
some extension. These two features are alternatives.
kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
Format: <Controller#>[,poll interval] Format: <Controller#>[,poll interval]
The controller # is the number of the ehci usb debug The controller # is the number of the ehci usb debug
......
...@@ -39,7 +39,7 @@ struct vm_area_struct; ...@@ -39,7 +39,7 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x100000u #define ___GFP_HARDWALL 0x100000u
#define ___GFP_THISNODE 0x200000u #define ___GFP_THISNODE 0x200000u
#define ___GFP_ACCOUNT 0x400000u #define ___GFP_ACCOUNT 0x400000u
#define ___GFP_RESERVE_0 0x800000u #define ___GFP_RELIABLE 0x800000u
#define ___GFP_RESERVE_1 0x1000000u #define ___GFP_RESERVE_1 0x1000000u
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
#define ___GFP_NOLOCKDEP 0x2000000u #define ___GFP_NOLOCKDEP 0x2000000u
...@@ -225,8 +225,10 @@ struct vm_area_struct; ...@@ -225,8 +225,10 @@ struct vm_area_struct;
/* Disable lockdep for GFP context tracking */ /* Disable lockdep for GFP context tracking */
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
/* Reserve 2 flags for future usage */ /* Alloc memory from mirrored region */
#define __GFP_RESERVE_0 ((__force gfp_t)___GFP_RESERVE_0) #define __GFP_RELIABLE ((__force gfp_t)___GFP_RELIABLE)
/* Reserve 1 flags for future usage */
#define __GFP_RESERVE_1 ((__force gfp_t)___GFP_RESERVE_1) #define __GFP_RESERVE_1 ((__force gfp_t)___GFP_RESERVE_1)
/* Room for N __GFP_FOO bits */ /* Room for N __GFP_FOO bits */
...@@ -315,6 +317,7 @@ struct vm_area_struct; ...@@ -315,6 +317,7 @@ struct vm_area_struct;
#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
__GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
#define GFP_RELIABLE __GFP_RELIABLE
/* Convert GFP flags to their corresponding migrate type */ /* Convert GFP flags to their corresponding migrate type */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
...@@ -461,6 +464,12 @@ static inline enum zone_type gfp_zone(gfp_t flags) ...@@ -461,6 +464,12 @@ static inline enum zone_type gfp_zone(gfp_t flags)
z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) & z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
((1 << GFP_ZONES_SHIFT) - 1); ((1 << GFP_ZONES_SHIFT) - 1);
VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1); VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
#ifdef CONFIG_MEMORY_RELIABLE
if (z == ZONE_MOVABLE && (flags & GFP_RELIABLE))
return ZONE_NORMAL;
#endif
return z; return z;
} }
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MM_MEM_RELIABLE__
#define __MM_MEM_RELIABLE__
#include <linux/stddef.h>
#include <linux/gfp.h>
#include <linux/mmzone.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#ifdef CONFIG_MEMORY_RELIABLE
extern struct static_key_false mem_reliable;
extern bool reliable_enabled;
extern void add_reliable_mem_size(long sz);
extern void mem_reliable_init(bool has_unmirrored_mem,
unsigned long *zone_movable_pfn);
static inline bool mem_reliable_is_enabled(void)
{
return static_branch_likely(&mem_reliable);
}
static inline bool zone_reliable(struct zone *zone)
{
return mem_reliable_is_enabled() && zone_idx(zone) < ZONE_MOVABLE;
}
static inline bool skip_none_movable_zone(gfp_t gfp, struct zoneref *z)
{
if (!mem_reliable_is_enabled())
return false;
if (!current->mm || (current->flags & PF_KTHREAD))
return false;
/* user tasks can only alloc memory from non-mirrored region */
if (!(gfp & GFP_RELIABLE) && (gfp & __GFP_HIGHMEM) &&
(gfp & __GFP_MOVABLE)) {
if (zonelist_zone_idx(z) < ZONE_MOVABLE)
return true;
}
return false;
}
#else
#define reliable_enabled 0
static inline bool mem_reliable_is_enabled(void) { return false; }
static inline void add_reliable_mem_size(long sz) {}
static inline void mem_reliable_init(bool has_unmirrored_mem,
unsigned long *zone_movable_pfn) {}
static inline bool zone_reliable(struct zone *zone) { return false; }
static inline bool skip_none_movable_zone(gfp_t gfp, struct zoneref *z)
{
return false;
}
#endif
#endif
...@@ -34,6 +34,9 @@ ...@@ -34,6 +34,9 @@
#include <linux/pgtable.h> #include <linux/pgtable.h>
#include <linux/kabi.h> #include <linux/kabi.h>
/* added to mm.h to avoid every caller adding new header file */
#include <linux/mem_reliable.h>
struct mempolicy; struct mempolicy;
struct anon_vma; struct anon_vma;
struct anon_vma_chain; struct anon_vma_chain;
......
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
{(unsigned long)__GFP_RESERVE_0, "__GFP_RESERVE_0"}, \ {(unsigned long)__GFP_RELIABLE, "__GFP_RELIABLE"}, \
{(unsigned long)__GFP_RESERVE_1, "__GFP_RESERVE_1"} \ {(unsigned long)__GFP_RESERVE_1, "__GFP_RESERVE_1"} \
#define show_gfp_flags(flags) \ #define show_gfp_flags(flags) \
......
...@@ -951,6 +951,24 @@ config PID_RESERVE ...@@ -951,6 +951,24 @@ config PID_RESERVE
and reserve the pids before init task start. In restore process, and reserve the pids before init task start. In restore process,
free the reserved pids and realloc them for use. free the reserved pids and realloc them for use.
config MEMORY_RELIABLE
bool "Support for memory reliable"
depends on NEED_MULTIPLE_NODES
depends on ARM64
default n
help
Memory reliable is based on mirror memory. It has the following
additional features:
a) normal user tasks never alloc memory from mirrored region;
b) special user tasks will allocate memory from mirrored region
by default; c) upper limit of mirrored region allcated for user
tasks, tmpfs and pagecache.
Special user tasks and tmpfs/pagecache can fallback to
non-mirrored region if you enable reliable fallback mechanism.
To enable this function, mirrored memory is needed and
"kernelcore=reliable" need to be added in kernel parameters.
source "mm/damon/Kconfig" source "mm/damon/Kconfig"
endmenu endmenu
...@@ -128,3 +128,4 @@ obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o ...@@ -128,3 +128,4 @@ obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
obj-$(CONFIG_PIN_MEMORY) += pin_mem.o obj-$(CONFIG_PIN_MEMORY) += pin_mem.o
obj-$(CONFIG_SHRINK_PAGECACHE) += page_cache_limit.o obj-$(CONFIG_SHRINK_PAGECACHE) += page_cache_limit.o
obj-$(CONFIG_ASCEND_SHARE_POOL) += share_pool.o obj-$(CONFIG_ASCEND_SHARE_POOL) += share_pool.o
obj-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "mem reliable: " fmt
#include <linux/mm.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
DEFINE_STATIC_KEY_FALSE(mem_reliable);
bool reliable_enabled;
static atomic_long_t total_reliable_mem;
void add_reliable_mem_size(long sz)
{
atomic_long_add(sz, &total_reliable_mem);
}
static unsigned long total_reliable_mem_sz(void)
{
return atomic_long_read(&total_reliable_mem);
}
static int reliable_mem_notifier(struct notifier_block *nb,
unsigned long action, void *arg)
{
struct memory_notify *m_arg = arg;
struct zone *zone;
switch (action) {
case MEM_ONLINE:
zone = page_zone(pfn_to_page(m_arg->start_pfn));
if (zone_reliable(zone))
add_reliable_mem_size(m_arg->nr_pages * PAGE_SIZE);
break;
case MEM_OFFLINE:
zone = page_zone(pfn_to_page(m_arg->start_pfn));
if (zone_reliable(zone))
add_reliable_mem_size(-m_arg->nr_pages * PAGE_SIZE);
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block reliable_notifier_block = {
.notifier_call = reliable_mem_notifier,
};
void mem_reliable_init(bool has_unmirrored_mem, unsigned long *zone_movable_pfn)
{
if (!reliable_enabled)
return;
if (atomic_long_read(&total_reliable_mem) == 0) {
memset(zone_movable_pfn, 0,
sizeof(unsigned long) * MAX_NUMNODES);
pr_err("init failed, mirrored memory size is zero.\n");
return;
}
if (!has_unmirrored_mem) {
pr_err("init failed, unmirrored memory size is zero.\n");
return;
}
if (register_hotmemory_notifier(&reliable_notifier_block)) {
pr_err("init failed, register memory notifier failed.\n");
return;
}
static_branch_enable(&mem_reliable);
pr_info("init succeed, mirrored memory size(%lu)\n",
total_reliable_mem_sz());
}
...@@ -3840,6 +3840,10 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, ...@@ -3840,6 +3840,10 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
struct page *page; struct page *page;
unsigned long mark; unsigned long mark;
/* skip non-movable zone for normal user tasks */
if (skip_none_movable_zone(gfp_mask, z))
continue;
/* /*
* CDM nodes get skipped if the requested gfp flag * CDM nodes get skipped if the requested gfp flag
* does not have __GFP_THISNODE set or the nodemask * does not have __GFP_THISNODE set or the nodemask
...@@ -7494,10 +7498,13 @@ static void __init find_zone_movable_pfns_for_nodes(void) ...@@ -7494,10 +7498,13 @@ static void __init find_zone_movable_pfns_for_nodes(void)
*/ */
if (mirrored_kernelcore) { if (mirrored_kernelcore) {
bool mem_below_4gb_not_mirrored = false; bool mem_below_4gb_not_mirrored = false;
bool has_unmirrored_mem = false;
for_each_mem_region(r) { for_each_mem_region(r) {
if (memblock_is_mirror(r)) if (memblock_is_mirror(r)) {
add_reliable_mem_size(r->size);
continue; continue;
}
nid = memblock_get_region_node(r); nid = memblock_get_region_node(r);
...@@ -7508,6 +7515,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) ...@@ -7508,6 +7515,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
continue; continue;
} }
has_unmirrored_mem = true;
zone_movable_pfn[nid] = zone_movable_pfn[nid] ? zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
min(usable_startpfn, zone_movable_pfn[nid]) : min(usable_startpfn, zone_movable_pfn[nid]) :
usable_startpfn; usable_startpfn;
...@@ -7516,6 +7524,8 @@ static void __init find_zone_movable_pfns_for_nodes(void) ...@@ -7516,6 +7524,8 @@ static void __init find_zone_movable_pfns_for_nodes(void)
if (mem_below_4gb_not_mirrored) if (mem_below_4gb_not_mirrored)
pr_warn("This configuration results in unmirrored kernel memory.\n"); pr_warn("This configuration results in unmirrored kernel memory.\n");
mem_reliable_init(has_unmirrored_mem, zone_movable_pfn);
goto out2; goto out2;
} }
...@@ -7827,10 +7837,29 @@ static int __init cmdline_parse_kernelcore(char *p) ...@@ -7827,10 +7837,29 @@ static int __init cmdline_parse_kernelcore(char *p)
{ {
/* parse kernelcore=mirror */ /* parse kernelcore=mirror */
if (parse_option_str(p, "mirror")) { if (parse_option_str(p, "mirror")) {
if (reliable_enabled) {
pr_info("kernelcore=reliable and kernelcore=mirror are alternative.\n");
return -EINVAL;
}
mirrored_kernelcore = true; mirrored_kernelcore = true;
return 0; return 0;
} }
#ifdef CONFIG_MEMORY_RELIABLE
/* parse kernelcore=reliable */
if (parse_option_str(p, "reliable")) {
if (!reliable_enabled && mirrored_kernelcore) {
pr_info("kernelcore=mirror and kernelcore=reliable are alternative.\n");
return -EINVAL;
}
reliable_enabled = true;
mirrored_kernelcore = true;
return 0;
}
#endif
return cmdline_parse_core(p, &required_kernelcore, return cmdline_parse_core(p, &required_kernelcore,
&required_kernelcore_percent); &required_kernelcore_percent);
} }
......
...@@ -660,7 +660,7 @@ static const struct { ...@@ -660,7 +660,7 @@ static const struct {
{ "__GFP_RECLAIM", "R" }, { "__GFP_RECLAIM", "R" },
{ "__GFP_DIRECT_RECLAIM", "DR" }, { "__GFP_DIRECT_RECLAIM", "DR" },
{ "__GFP_KSWAPD_RECLAIM", "KR" }, { "__GFP_KSWAPD_RECLAIM", "KR" },
{ "__GFP_RESERVE_0", "RE0" }, { "__GFP_RELIABLE", "REL" },
{ "__GFP_RESERVE_1", "RE1" }, { "__GFP_RESERVE_1", "RE1" },
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册