提交 33d1f46a 编写于 作者: M Ma Wupeng 提交者: Yang Yingliang

mm: Introduce memory reliable

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Introduction
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>

============

Memory reliable feature is a memory tiering mechanism. It is based on
kernel mirror feature, which splits memory into two sperate regions,
mirrored(reliable) region and non-mirrored (non-reliable) region.

for kernel mirror feature:

- allocate kernel memory from mirrored region by default
- allocate user memory from non-mirrored region by default

non-mirrored region will be arranged into ZONE_MOVABLE.

for kernel reliable feature, it has additional features below:

- normal user tasks never alloc memory from mirrored region with userspace
  apis(malloc, mmap, etc.)
- special user tasks will allocate memory from mirrored region by default
- tmpfs/pagecache allocate memory from mirrored region by default
- upper limit of mirrored region allcated for user tasks, tmpfs and
  pagecache

Support Reliable fallback mechanism which allows special user tasks, tmpfs
and pagecache can fallback to alloc non-mirrored region, it's the default
setting.

In order to fulfil the goal

- ___GFP_RELIABILITY flag added for alloc memory from mirrored region.

- the high_zoneidx for special user tasks/tmpfs/pagecache is set to
  ZONE_NORMAL.

- normal user tasks could only alloc from ZONE_MOVABLE.

This patch is just the main framework, memory reliable support for special
user tasks, pagecache and tmpfs has own patches.

To enable this function, mirrored(reliable) memory is needed and
"kernelcore=reliable" should be added to kernel parameters.
Signed-off-by: NMa Wupeng <mawupeng1@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 20942c83
......@@ -1945,7 +1945,7 @@
keepinitrd [HW,ARM]
kernelcore= [KNL,X86,IA-64,PPC,ARM64]
Format: nn[KMGTPE] | nn% | "mirror"
Format: nn[KMGTPE] | nn% | "mirror" | "reliable"
This parameter specifies the amount of memory usable by
the kernel for non-movable allocations. The requested
amount is spread evenly throughout all nodes in the
......@@ -1969,6 +1969,9 @@
for Movable pages. "nn[KMGTPE]", "nn%", and "mirror"
are exclusive, so you cannot specify multiple forms.
Option "reliable" is base on option "mirror", but make
some extension. These two features are alternatives.
kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
Format: <Controller#>[,poll interval]
The controller # is the number of the ehci usb debug
......
......@@ -45,6 +45,12 @@ struct vm_area_struct;
#define ___GFP_NOLOCKDEP 0
#endif
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
#ifdef CONFIG_MEMORY_RELIABLE
/* add flag at the end of gfp_mask to aovid kapi change */
#define ___GFP_RELIABILITY 0x40000000u
#else
#define ___GFP_RELIABILITY 0
#endif
/*
* Physical address zone modifiers (see linux/mmzone.h - low four bits)
......@@ -446,6 +452,12 @@ static inline enum zone_type gfp_zone(gfp_t flags)
z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
((1 << GFP_ZONES_SHIFT) - 1);
VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
#ifdef CONFIG_MEMORY_RELIABLE
if (z == ZONE_MOVABLE && flags & ___GFP_RELIABILITY)
return ZONE_NORMAL;
#endif
return z;
}
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MM_MEM_RELIABLE__
#define __MM_MEM_RELIABLE__
#include <linux/stddef.h>
#include <linux/gfp.h>
#include <linux/mmzone.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#ifdef CONFIG_MEMORY_RELIABLE
extern struct static_key_false mem_reliable;
extern bool reliable_enabled;
extern void add_reliable_mem_size(long sz);
extern void mem_reliable_init(bool has_unmirrored_mem,
unsigned long *zone_movable_pfn);
static inline bool mem_reliable_is_enabled(void)
{
return static_branch_likely(&mem_reliable);
}
static inline bool zone_reliable(struct zone *zone)
{
return mem_reliable_is_enabled() && zone_idx(zone) < ZONE_MOVABLE;
}
static inline bool skip_none_movable_zone(gfp_t gfp, struct zoneref *z)
{
if (!mem_reliable_is_enabled())
return false;
if (!current->mm || (current->flags & PF_KTHREAD))
return false;
/* user tasks can only alloc memory from non-mirrored region */
if (!(gfp & ___GFP_RELIABILITY) && (gfp & __GFP_HIGHMEM) &&
(gfp & __GFP_MOVABLE)) {
if (zonelist_zone_idx(z) < ZONE_MOVABLE)
return true;
}
return false;
}
#else
#define reliable_enabled 0
static inline bool mem_reliable_is_enabled(void) { return false; }
static inline void add_reliable_mem_size(long sz) {}
static inline void mem_reliable_init(bool has_unmirrored_mem,
unsigned long *zone_movable_pfn) {}
static inline bool zone_reliable(struct zone *zone) { return false; }
static inline bool skip_none_movable_zone(gfp_t gfp, struct zoneref *z)
{
return false;
}
#endif
#endif
......@@ -28,6 +28,9 @@
#include <linux/memremap.h>
#include <linux/overflow.h>
/* added to mm.h to avoid every caller adding new header file */
#include <linux/mem_reliable.h>
struct mempolicy;
struct anon_vma;
struct anon_vma_chain;
......
......@@ -831,4 +831,22 @@ config PID_RESERVE
We record the pid of dump task in the reserve memory,
and reserve the pids before init task start. In restore process,
free the reserved pids and realloc them for use.
config MEMORY_RELIABLE
bool "Support for memory reliable"
depends on ARM64
default n
help
Memory reliable is based on mirror memory. It has the following
additional features:
a) normal user tasks never alloc memory from mirrored region;
b) special user tasks will allocate memory from mirrored region
by default; c) upper limit of mirrored region allcated for user
tasks, tmpfs and pagecache.
Special user tasks and tmpfs/pagecache can fallback to
non-mirrored region if you enable reliable fallback mechanism.
To enable this function, mirrored memory is needed and
"kernelcore=reliable" need to be added in kernel parameters.
endmenu
......@@ -109,3 +109,4 @@ obj-$(CONFIG_ASCEND_AUTO_TUNING_HUGEPAGE) += hugepage_tuning.o
obj-$(CONFIG_PIN_MEMORY) += pin_mem.o
obj-$(CONFIG_ASCEND_SHARE_POOL) += share_pool.o
obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o
obj-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o
// SPDX-License-Identifier: GPL-2.0-only
#define pr_fmt(fmt) "mem reliable: " fmt
#include <linux/mm.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
DEFINE_STATIC_KEY_FALSE(mem_reliable);
bool reliable_enabled;
static atomic_long_t total_reliable_mem;
void add_reliable_mem_size(long sz)
{
atomic_long_add(sz, &total_reliable_mem);
}
static int reliable_mem_notifier(struct notifier_block *nb,
unsigned long action, void *arg)
{
struct memory_notify *m_arg = arg;
struct zone *zone;
switch (action) {
case MEM_ONLINE:
zone = page_zone(pfn_to_page(m_arg->start_pfn));
if (zone_reliable(zone))
add_reliable_mem_size(m_arg->nr_pages * PAGE_SIZE);
break;
case MEM_OFFLINE:
zone = page_zone(pfn_to_page(m_arg->start_pfn));
if (zone_reliable(zone))
add_reliable_mem_size(-m_arg->nr_pages * PAGE_SIZE);
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block reliable_notifier_block = {
.notifier_call = reliable_mem_notifier,
};
void mem_reliable_init(bool has_unmirrored_mem, unsigned long *zone_movable_pfn)
{
if (!reliable_enabled)
return;
if (atomic_long_read(&total_reliable_mem) == 0) {
memset(zone_movable_pfn, 0,
sizeof(unsigned long) * MAX_NUMNODES);
pr_err("init failed, mirrored memory size is zero.");
return;
}
if (!has_unmirrored_mem) {
pr_err("init failed, unmirrored memory size is zero.");
return;
}
if (register_hotmemory_notifier(&reliable_notifier_block)) {
pr_err("init failed, register memory notifier failed.");
return;
}
static_branch_enable(&mem_reliable);
pr_info("init succeed, mirrored memory size(%lu)",
atomic_long_read(&total_reliable_mem));
}
......@@ -3454,6 +3454,10 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
struct page *page;
unsigned long mark;
/* skip non-movable zone for normal user tasks */
if (skip_none_movable_zone(gfp_mask, z))
continue;
/*
* CDM nodes get skipped if the requested gfp flag
* does not have __GFP_THISNODE set or the nodemask
......@@ -4557,6 +4561,18 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
ac->high_zoneidx, ac->nodemask);
}
static inline void prepare_before_alloc(gfp_t *gfp_mask)
{
gfp_t gfp_ori = *gfp_mask;
*gfp_mask &= gfp_allowed_mask;
if (!mem_reliable_is_enabled())
return;
if (gfp_ori & ___GFP_RELIABILITY)
*gfp_mask |= ___GFP_RELIABILITY;
}
/*
* This is the 'heart' of the zoned buddy allocator.
*/
......@@ -4578,7 +4594,8 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
return NULL;
}
gfp_mask &= gfp_allowed_mask;
prepare_before_alloc(&gfp_mask);
alloc_mask = gfp_mask;
if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
return NULL;
......@@ -6912,10 +6929,13 @@ static void __init find_zone_movable_pfns_for_nodes(void)
*/
if (mirrored_kernelcore) {
bool mem_below_4gb_not_mirrored = false;
bool has_unmirrored_mem = false;
for_each_memblock(memory, r) {
if (memblock_is_mirror(r))
if (memblock_is_mirror(r)) {
add_reliable_mem_size(r->size);
continue;
}
nid = r->nid;
......@@ -6926,6 +6946,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
continue;
}
has_unmirrored_mem = true;
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
min(usable_startpfn, zone_movable_pfn[nid]) :
usable_startpfn;
......@@ -6934,6 +6955,8 @@ static void __init find_zone_movable_pfns_for_nodes(void)
if (mem_below_4gb_not_mirrored)
pr_warn("This configuration results in unmirrored kernel memory.");
mem_reliable_init(has_unmirrored_mem, zone_movable_pfn);
goto out2;
}
......@@ -7226,9 +7249,28 @@ static int __init cmdline_parse_kernelcore(char *p)
{
/* parse kernelcore=mirror */
if (parse_option_str(p, "mirror")) {
if (reliable_enabled) {
pr_info("kernelcore=reliable and kernelcore=mirror are alternative.");
return -EINVAL;
}
mirrored_kernelcore = true;
return 0;
}
#ifdef CONFIG_MEMORY_RELIABLE
/* parse kernelcore=reliable */
if (parse_option_str(p, "reliable")) {
if (!reliable_enabled && mirrored_kernelcore) {
pr_info("kernelcore=mirror and kernelcore=reliable are alternative.");
return -EINVAL;
}
reliable_enabled = true;
mirrored_kernelcore = true;
return 0;
}
#endif
return cmdline_parse_core(p, &required_kernelcore,
&required_kernelcore_percent);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册