提交 8b805498 编写于 作者: Y Yu Liao 提交者: Yang Yingliang

mm: add sysctl to clear free list pages

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

This patch add sysctl to clear pages in free lists of each NUMA node.
For each NUMA node, clear each page in the free list, these work is
scheduled on a random CPU of the NUMA node.

When kasan is enabled and the pages are free, the shadow memory will be
filled with 0xFF, writing these free pages will cause UAF, so just
disable KASAN for clear freelist.
Signed-off-by: NYu Liao <liaoyu15@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 53d39163
...@@ -20,6 +20,7 @@ Currently, these files are in /proc/sys/vm: ...@@ -20,6 +20,7 @@ Currently, these files are in /proc/sys/vm:
- admin_reserve_kbytes - admin_reserve_kbytes
- block_dump - block_dump
- clear_freelist_pages
- compact_memory - compact_memory
- compact_unevictable_allowed - compact_unevictable_allowed
- dirty_background_bytes - dirty_background_bytes
...@@ -104,6 +105,18 @@ information on block I/O debugging is in Documentation/laptops/laptop-mode.txt. ...@@ -104,6 +105,18 @@ information on block I/O debugging is in Documentation/laptops/laptop-mode.txt.
============================================================== ==============================================================
clear_freelist_pages
Available only when CONFIG_CLEAR_FREELIST_PAGE is set. When 1 is written to the
file, all pages in free lists will be written with 0.
Zone lock is held during clear_freelist_pages, if the execution time is too
long, RCU CPU Stall warnings will be print. For each NUMA node,
clear_freelist_pages is performed on a "random" CPU of the NUMA node.
The time consuming is related to the hardware.
==============================================================
compact_memory compact_memory
Available only when CONFIG_COMPACTION is set. When 1 is written to the file, Available only when CONFIG_COMPACTION is set. When 1 is written to the file,
......
...@@ -849,4 +849,11 @@ config MEMORY_RELIABLE ...@@ -849,4 +849,11 @@ config MEMORY_RELIABLE
To enable this function, mirrored memory is needed and To enable this function, mirrored memory is needed and
"kernelcore=reliable" need to be added in kernel parameters. "kernelcore=reliable" need to be added in kernel parameters.
config CLEAR_FREELIST_PAGE
bool "Support for clear free list pages"
depends on MMU && SYSCTL
default n
help
Say y here to enable the clear free list pages feature.
endmenu endmenu
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
KASAN_SANITIZE_slab_common.o := n KASAN_SANITIZE_slab_common.o := n
KASAN_SANITIZE_slab.o := n KASAN_SANITIZE_slab.o := n
KASAN_SANITIZE_slub.o := n KASAN_SANITIZE_slub.o := n
KASAN_SANITIZE_clear_freelist_page.o := n
# These files are disabled because they produce non-interesting and/or # These files are disabled because they produce non-interesting and/or
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of # flaky coverage that is not a function of syscall inputs. E.g. slab is out of
...@@ -110,3 +111,4 @@ obj-$(CONFIG_PIN_MEMORY) += pin_mem.o ...@@ -110,3 +111,4 @@ obj-$(CONFIG_PIN_MEMORY) += pin_mem.o
obj-$(CONFIG_ASCEND_SHARE_POOL) += share_pool.o obj-$(CONFIG_ASCEND_SHARE_POOL) += share_pool.o
obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o
obj-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o obj-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o
obj-$(CONFIG_CLEAR_FREELIST_PAGE) += clear_freelist_page.o
// SPDX-License-Identifier: GPL-2.0
/*
* Support for clear free list pages.
*/
#include <linux/mmzone.h>
#include <linux/mm_types.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/nmi.h>
#include <linux/module.h>
#define for_each_populated_zone_pgdat(pgdat, zone) \
for (zone = pgdat->node_zones; \
zone; \
zone = next_pgdat_zone(zone)) \
if (!populated_zone(zone)) \
; /* do nothing */ \
else
struct pgdat_entry {
struct pglist_data *pgdat;
struct work_struct work;
};
static DECLARE_WAIT_QUEUE_HEAD(clear_freelist_wait);
static DEFINE_MUTEX(clear_freelist_lock);
static atomic_t clear_freelist_workers;
static atomic_t clear_pages_num;
static int one = 1;
/*
* next_pgdat_zone - helper magic for for_each_populated_zone_pgdat()
*/
static struct zone *next_pgdat_zone(struct zone *zone)
{
pg_data_t *pgdat = zone->zone_pgdat;
if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
zone++;
else
zone = NULL;
return zone;
}
static void clear_pgdat_freelist_pages(struct work_struct *work)
{
struct pgdat_entry *entry = container_of(work, struct pgdat_entry, work);
struct pglist_data *pgdat = entry->pgdat;
unsigned long flags, order, t;
struct page *page;
struct zone *zone;
for_each_populated_zone_pgdat(pgdat, zone) {
spin_lock_irqsave(&zone->lock, flags);
for_each_migratetype_order(order, t) {
list_for_each_entry(page, &zone->free_area[order].free_list[t], lru) {
#ifdef CONFIG_KMAP_LOCAL
int i;
/* Clear highmem by clear_highpage() */
for (i = 0; i < (1 << order); i++)
clear_highpage(page + i);
#else
memset(page_address(page), 0, (1 << order) * PAGE_SIZE);
#endif
touch_nmi_watchdog();
atomic_add(1 << order, &clear_pages_num);
}
}
spin_unlock_irqrestore(&zone->lock, flags);
cond_resched();
}
kfree(entry);
if (atomic_dec_and_test(&clear_freelist_workers))
wake_up(&clear_freelist_wait);
}
static void init_clear_freelist_work(struct pglist_data *pgdat)
{
struct pgdat_entry *entry;
entry = kzalloc(sizeof(struct pgdat_entry), GFP_KERNEL);
if (!entry)
return;
entry->pgdat = pgdat;
INIT_WORK(&entry->work, clear_pgdat_freelist_pages);
queue_work_node(pgdat->node_id, system_unbound_wq, &entry->work);
}
static void clear_freelist_pages(void)
{
struct pglist_data *pgdat;
mutex_lock(&clear_freelist_lock);
drain_all_pages(NULL);
for_each_online_pgdat(pgdat) {
atomic_inc(&clear_freelist_workers);
init_clear_freelist_work(pgdat);
}
wait_event(clear_freelist_wait, atomic_read(&clear_freelist_workers) == 0);
pr_debug("Cleared pages %d\nFree pages %lu\n", atomic_read(&clear_pages_num),
global_zone_page_state(NR_FREE_PAGES));
atomic_set(&clear_pages_num, 0);
mutex_unlock(&clear_freelist_lock);
}
static int sysctl_clear_freelist_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
int val;
table->data = &val;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
clear_freelist_pages();
return ret;
}
static struct ctl_table clear_freelist_table[] = {
{
.procname = "clear_freelist_pages",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0200,
.proc_handler = &sysctl_clear_freelist_handler,
.extra1 = &one,
.extra2 = &one,
},
{ }
};
static struct ctl_table sys_ctl_table[] = {
{
.procname = "vm",
.mode = 0555,
.child = clear_freelist_table,
},
{ }
};
static int __init clear_freelist_init(void)
{
register_sysctl_table(sys_ctl_table);
return 0;
}
module_init(clear_freelist_init);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册