提交 abfd8691 编写于 作者: L liubo 提交者: Zheng Zengkai

etmem: add swapcache reclaim to etmem

euleros inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I5DC4A
CVE: NA

-------------------------------------------------

etmem, the memory vertical expansion technology,

In the current etmem process, memory page swapping is implemented by
invoking shrink_page_list. When this interface is invoked for the first
time, pages are added to the swap cache and written to disks.The swap
cache page is reclaimed only when this interface is invoked for the
second time and no process accesses the page.However, in the etmem
process, the user mode scans pages that have been accessed, and the
migration is not delivered to pages that are not accessed by processes.
Therefore, the swap cache may always be occupied.
To solve the preceding problem, add the logic for actively reclaiming
the swap cache.When the swap cache occupies a large amount of memory,
the system proactively scans the LRU linked list and reclaims the
swap cache to save memory within the specified range.
Signed-off-by: Nliubo <liubo254@huawei.com>
Reviewed-by: NMiaohe Lin <linmiaohe@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 79c68ab3
...@@ -10,6 +10,24 @@ ...@@ -10,6 +10,24 @@
#include <linux/mempolicy.h> #include <linux/mempolicy.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/numa.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/mm_inline.h>
#define RECLAIM_SWAPCACHE_MAGIC 0X77
#define SET_SWAPCACHE_WMARK _IOW(RECLAIM_SWAPCACHE_MAGIC, 0x02, unsigned int)
#define RECLAIM_SWAPCACHE_ON _IOW(RECLAIM_SWAPCACHE_MAGIC, 0x01, unsigned int)
#define RECLAIM_SWAPCACHE_OFF _IOW(RECLAIM_SWAPCACHE_MAGIC, 0x00, unsigned int)
#define WATERMARK_MAX 100
#define SWAP_SCAN_NUM_MAX 32
static struct task_struct *reclaim_swapcache_tk;
static bool enable_swapcache_reclaim;
static unsigned long swapcache_watermark[ETMEM_SWAPCACHE_NR_WMARK];
static DECLARE_WAIT_QUEUE_HEAD(reclaim_queue);
static ssize_t swap_pages_write(struct file *file, const char __user *buf, static ssize_t swap_pages_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
...@@ -45,7 +63,7 @@ static ssize_t swap_pages_write(struct file *file, const char __user *buf, ...@@ -45,7 +63,7 @@ static ssize_t swap_pages_write(struct file *file, const char __user *buf,
ret = kstrtoul(p, 16, &vaddr); ret = kstrtoul(p, 16, &vaddr);
if (ret != 0) if (ret != 0)
continue; continue;
/*If get page struct failed, ignore it, get next page*/ /* If get page struct failed, ignore it, get next page */
page = get_page_from_vaddr(mm, vaddr); page = get_page_from_vaddr(mm, vaddr);
if (!page) if (!page)
continue; continue;
...@@ -78,9 +96,153 @@ static int swap_pages_release(struct inode *inode, struct file *file) ...@@ -78,9 +96,153 @@ static int swap_pages_release(struct inode *inode, struct file *file)
return 0; return 0;
} }
/* check if swapcache meet requirements */
static bool swapcache_balanced(void)
{
return total_swapcache_pages() < swapcache_watermark[ETMEM_SWAPCACHE_WMARK_HIGH];
}
/* the flag present if swapcache reclaim is started */
static bool swapcache_reclaim_enabled(void)
{
return READ_ONCE(enable_swapcache_reclaim);
}
static void start_swapcache_reclaim(void)
{
if (swapcache_balanced())
return;
/* RECLAIM_SWAPCACHE_ON trigger the thread to start running. */
if (!waitqueue_active(&reclaim_queue))
return;
WRITE_ONCE(enable_swapcache_reclaim, true);
wake_up_interruptible(&reclaim_queue);
}
static void stop_swapcache_reclaim(void)
{
WRITE_ONCE(enable_swapcache_reclaim, false);
}
static bool should_goto_sleep(void)
{
if (swapcache_balanced())
stop_swapcache_reclaim();
if (swapcache_reclaim_enabled())
return false;
return true;
}
static int get_swapcache_watermark(unsigned int ratio)
{
unsigned int low_watermark;
unsigned int high_watermark;
low_watermark = ratio & 0xFF;
high_watermark = (ratio >> 8) & 0xFF;
if (low_watermark > WATERMARK_MAX ||
high_watermark > WATERMARK_MAX ||
low_watermark > high_watermark)
return -EPERM;
swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW] = totalram_pages() *
low_watermark / WATERMARK_MAX;
swapcache_watermark[ETMEM_SWAPCACHE_WMARK_HIGH] = totalram_pages() *
high_watermark / WATERMARK_MAX;
return 0;
}
extern struct file_operations proc_swap_pages_operations; extern struct file_operations proc_swap_pages_operations;
static void reclaim_swapcache_try_to_sleep(void)
{
DEFINE_WAIT(wait);
if (freezing(current) || kthread_should_stop())
return;
prepare_to_wait(&reclaim_queue, &wait, TASK_INTERRUPTIBLE);
if (should_goto_sleep()) {
if (!kthread_should_stop())
schedule();
}
finish_wait(&reclaim_queue, &wait);
}
static void etmem_reclaim_swapcache(void)
{
do_swapcache_reclaim(swapcache_watermark,
ARRAY_SIZE(swapcache_watermark));
stop_swapcache_reclaim();
}
static int reclaim_swapcache_proactive(void *para)
{
set_freezable();
while (1) {
bool ret;
reclaim_swapcache_try_to_sleep();
ret = try_to_freeze();
if (kthread_should_stop())
break;
if (ret)
continue;
etmem_reclaim_swapcache();
}
return 0;
}
static int reclaim_swapcache_run(void)
{
int ret = 0;
reclaim_swapcache_tk = kthread_run(reclaim_swapcache_proactive, NULL,
"etmem_recalim_swapcache");
if (IS_ERR(reclaim_swapcache_tk)) {
ret = PTR_ERR(reclaim_swapcache_tk);
reclaim_swapcache_tk = NULL;
}
return ret;
}
static long swap_page_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
void __user *argp = (void __user *)arg;
unsigned int ratio;
switch (cmd) {
case RECLAIM_SWAPCACHE_ON:
if (swapcache_reclaim_enabled())
return 0;
start_swapcache_reclaim();
break;
case RECLAIM_SWAPCACHE_OFF:
stop_swapcache_reclaim();
break;
case SET_SWAPCACHE_WMARK:
if (get_user(ratio, (unsigned int __user *)argp))
return -EFAULT;
if (get_swapcache_watermark(ratio) != 0)
return -EFAULT;
break;
default:
return -EPERM;
}
return 0;
}
static int swap_pages_entry(void) static int swap_pages_entry(void)
{ {
proc_swap_pages_operations.flock(NULL, 1, NULL); proc_swap_pages_operations.flock(NULL, 1, NULL);
...@@ -88,8 +250,12 @@ static int swap_pages_entry(void) ...@@ -88,8 +250,12 @@ static int swap_pages_entry(void)
proc_swap_pages_operations.write = swap_pages_write; proc_swap_pages_operations.write = swap_pages_write;
proc_swap_pages_operations.open = swap_pages_open; proc_swap_pages_operations.open = swap_pages_open;
proc_swap_pages_operations.release = swap_pages_release; proc_swap_pages_operations.release = swap_pages_release;
proc_swap_pages_operations.unlocked_ioctl = swap_page_ioctl;
proc_swap_pages_operations.flock(NULL, 0, NULL); proc_swap_pages_operations.flock(NULL, 0, NULL);
enable_swapcache_reclaim = false;
reclaim_swapcache_run();
return 0; return 0;
} }
...@@ -100,7 +266,14 @@ static void swap_pages_exit(void) ...@@ -100,7 +266,14 @@ static void swap_pages_exit(void)
proc_swap_pages_operations.write = NULL; proc_swap_pages_operations.write = NULL;
proc_swap_pages_operations.open = NULL; proc_swap_pages_operations.open = NULL;
proc_swap_pages_operations.release = NULL; proc_swap_pages_operations.release = NULL;
proc_swap_pages_operations.unlocked_ioctl = NULL;
proc_swap_pages_operations.flock(NULL, 0, NULL); proc_swap_pages_operations.flock(NULL, 0, NULL);
if (!IS_ERR(reclaim_swapcache_tk)) {
kthread_stop(reclaim_swapcache_tk);
reclaim_swapcache_tk = NULL;
}
return;
} }
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
......
...@@ -2032,11 +2032,19 @@ static int mm_swap_release(struct inode *inode, struct file *file) ...@@ -2032,11 +2032,19 @@ static int mm_swap_release(struct inode *inode, struct file *file)
return ret; return ret;
} }
static long mm_swap_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (proc_swap_pages_operations.unlocked_ioctl)
return proc_swap_pages_operations.unlocked_ioctl(filp, cmd, arg);
return 0;
}
const struct file_operations proc_mm_swap_operations = { const struct file_operations proc_mm_swap_operations = {
.llseek = mem_lseek, .llseek = mem_lseek,
.write = mm_swap_write, .write = mm_swap_write,
.open = mm_swap_open, .open = mm_swap_open,
.release = mm_swap_release, .release = mm_swap_release,
.unlocked_ioctl = mm_swap_ioctl,
}; };
#endif #endif
#endif /* CONFIG_PROC_PAGE_MONITOR */ #endif /* CONFIG_PROC_PAGE_MONITOR */
......
...@@ -764,6 +764,23 @@ static inline void list_splice_tail_init(struct list_head *list, ...@@ -764,6 +764,23 @@ static inline void list_splice_tail_init(struct list_head *list,
!list_entry_is_head(pos, head, member); \ !list_entry_is_head(pos, head, member); \
pos = n, n = list_prev_entry(n, member)) pos = n, n = list_prev_entry(n, member))
/**
* list_for_each_entry_safe_reverse_from - iterate backwards over list from
* current point safe against removal
* @pos: the type * to use as a loop cursor.
* @n: another type * to use as temporary storage
* @head: the head for your list.
* @member: the name of the list_head within the struct.
*
* Iterate backwards over list of given type from current point, safe against
* removal of list entry.
*/
#define list_for_each_entry_safe_reverse_from(pos, n, head, member) \
for (n = list_prev_entry(pos, member); \
!list_entry_is_head(pos, head, member); \
pos = n, n = list_prev_entry(n, member))
/** /**
* list_safe_reset_next - reset a stale list_for_each_entry_safe loop * list_safe_reset_next - reset a stale list_for_each_entry_safe loop
* @pos: the loop cursor used in the list_for_each_entry_safe loop * @pos: the loop cursor used in the list_for_each_entry_safe loop
......
...@@ -385,9 +385,40 @@ extern int remove_mapping(struct address_space *mapping, struct page *page); ...@@ -385,9 +385,40 @@ extern int remove_mapping(struct address_space *mapping, struct page *page);
extern unsigned long reclaim_pages(struct list_head *page_list); extern unsigned long reclaim_pages(struct list_head *page_list);
#ifdef CONFIG_ETMEM #ifdef CONFIG_ETMEM
enum etmem_swapcache_watermark_en {
ETMEM_SWAPCACHE_WMARK_LOW,
ETMEM_SWAPCACHE_WMARK_HIGH,
ETMEM_SWAPCACHE_NR_WMARK
};
extern int add_page_for_swap(struct page *page, struct list_head *pagelist); extern int add_page_for_swap(struct page *page, struct list_head *pagelist);
extern struct page *get_page_from_vaddr(struct mm_struct *mm, extern struct page *get_page_from_vaddr(struct mm_struct *mm,
unsigned long vaddr); unsigned long vaddr);
extern int do_swapcache_reclaim(unsigned long *swapcache_watermark,
unsigned int watermark_nr);
extern bool kernel_swap_enabled(void);
#else
static inline int add_page_for_swap(struct page *page, struct list_head *pagelist)
{
return 0;
}
static inline struct page *get_page_from_vaddr(struct mm_struct *mm,
unsigned long vaddr)
{
return NULL;
}
static inline int do_swapcache_reclaim(unsigned long *swapcache_watermark,
unsigned int watermark_nr)
{
return 0;
}
static inline bool kernel_swap_enabled(void)
{
return true;
}
#endif #endif
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
extern int node_reclaim_mode; extern int node_reclaim_mode;
...@@ -730,9 +761,5 @@ static inline bool mem_cgroup_swap_full(struct page *page) ...@@ -730,9 +761,5 @@ static inline bool mem_cgroup_swap_full(struct page *page)
} }
#endif #endif
#ifdef CONFIG_ETMEM
extern bool kernel_swap_enabled(void);
#endif
#endif /* __KERNEL__*/ #endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */ #endif /* _LINUX_SWAP_H */
...@@ -96,6 +96,7 @@ unsigned long total_swapcache_pages(void) ...@@ -96,6 +96,7 @@ unsigned long total_swapcache_pages(void)
} }
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(total_swapcache_pages);
static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
......
...@@ -4572,7 +4572,7 @@ int add_page_for_swap(struct page *page, struct list_head *pagelist) ...@@ -4572,7 +4572,7 @@ int add_page_for_swap(struct page *page, struct list_head *pagelist)
int err = -EBUSY; int err = -EBUSY;
struct page *head; struct page *head;
/*If the page is mapped by more than one process, do not swap it */ /* If the page is mapped by more than one process, do not swap it */
if (page_mapcount(page) > 1) if (page_mapcount(page) > 1)
return -EACCES; return -EACCES;
...@@ -4621,6 +4621,316 @@ struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr) ...@@ -4621,6 +4621,316 @@ struct page *get_page_from_vaddr(struct mm_struct *mm, unsigned long vaddr)
return page; return page;
} }
EXPORT_SYMBOL_GPL(get_page_from_vaddr); EXPORT_SYMBOL_GPL(get_page_from_vaddr);
static int add_page_for_reclaim_swapcache(struct page *page,
struct list_head *pagelist, struct lruvec *lruvec, enum lru_list lru)
{
struct page *head;
/* If the page is mapped by more than one process, do not swap it */
if (page_mapcount(page) > 1)
return -EACCES;
if (PageHuge(page))
return -EACCES;
head = compound_head(page);
switch (__isolate_lru_page_prepare(head, 0)) {
case 0:
if (unlikely(!get_page_unless_zero(page)))
return -1;
if (!TestClearPageLRU(page)) {
/*
* This page may in other isolation path,
* but we still hold lru_lock.
*/
put_page(page);
return -1;
}
list_move(&head->lru, pagelist);
update_lru_size(lruvec, lru, page_zonenum(head), -thp_nr_pages(head));
break;
case -EBUSY:
return -1;
default:
break;
}
return 0;
}
static unsigned long reclaim_swapcache_pages_from_list(int nid,
struct list_head *page_list, unsigned long reclaim_num, bool putback_flag)
{
struct scan_control sc = {
.may_unmap = 1,
.may_swap = 1,
.may_writepage = 1,
.gfp_mask = GFP_KERNEL,
};
unsigned long nr_reclaimed = 0;
unsigned long nr_moved = 0;
struct page *page, *next;
LIST_HEAD(swap_pages);
struct pglist_data *pgdat = NULL;
struct reclaim_stat stat;
pgdat = NODE_DATA(nid);
if (putback_flag)
goto putback_list;
if (reclaim_num == 0)
return 0;
list_for_each_entry_safe(page, next, page_list, lru) {
if (!page_is_file_lru(page) && !__PageMovable(page)
&& PageSwapCache(page)) {
ClearPageActive(page);
list_move(&page->lru, &swap_pages);
nr_moved++;
}
if (nr_moved >= reclaim_num)
break;
}
/* swap the pages */
if (pgdat)
nr_reclaimed = shrink_page_list(&swap_pages,
pgdat,
&sc,
&stat, true);
while (!list_empty(&swap_pages)) {
page = lru_to_page(&swap_pages);
list_del(&page->lru);
putback_lru_page(page);
}
return nr_reclaimed;
putback_list:
while (!list_empty(page_list)) {
page = lru_to_page(page_list);
list_del(&page->lru);
putback_lru_page(page);
}
return nr_reclaimed;
}
#define SWAP_SCAN_NUM_MAX 32
static bool swapcache_below_watermark(unsigned long *swapcache_watermark)
{
return total_swapcache_pages() < swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW];
}
static unsigned long get_swapcache_reclaim_num(unsigned long *swapcache_watermark)
{
return total_swapcache_pages() >
swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW] ?
(total_swapcache_pages() - swapcache_watermark[ETMEM_SWAPCACHE_WMARK_LOW]) : 0;
}
/*
* The main function to reclaim swapcache, the whole reclaim process is
* divided into 3 steps.
* 1. get the total_swapcache_pages num to reclaim.
* 2. scan the LRU linked list of each memory node to obtain the
* swapcache pages that can be reclaimd.
* 3. reclaim the swapcache page until the requirements are meet.
*/
int do_swapcache_reclaim(unsigned long *swapcache_watermark,
unsigned int watermark_nr)
{
int err = -EINVAL;
unsigned long swapcache_to_reclaim = 0;
unsigned long nr_reclaimed = 0;
unsigned long swapcache_total_reclaimable = 0;
unsigned long reclaim_page_count = 0;
unsigned long *nr = NULL;
unsigned long *nr_to_reclaim = NULL;
struct list_head *swapcache_list = NULL;
int nid = 0;
struct lruvec *lruvec = NULL;
struct list_head *src = NULL;
struct page *page = NULL;
struct page *next = NULL;
struct page *pos = NULL;
struct mem_cgroup *memcg = NULL;
struct mem_cgroup *target_memcg = NULL;
pg_data_t *pgdat = NULL;
unsigned int scan_count = 0;
int nid_num = 0;
if (swapcache_watermark == NULL ||
watermark_nr < ETMEM_SWAPCACHE_NR_WMARK)
return err;
/* get the total_swapcache_pages num to reclaim. */
swapcache_to_reclaim = get_swapcache_reclaim_num(swapcache_watermark);
if (swapcache_to_reclaim <= 0)
return err;
nr = kcalloc(MAX_NUMNODES, sizeof(unsigned long), GFP_KERNEL);
if (nr == NULL)
return -ENOMEM;
nr_to_reclaim = kcalloc(MAX_NUMNODES, sizeof(unsigned long), GFP_KERNEL);
if (nr_to_reclaim == NULL) {
kfree(nr);
return -ENOMEM;
}
swapcache_list = kcalloc(MAX_NUMNODES, sizeof(struct list_head), GFP_KERNEL);
if (swapcache_list == NULL) {
kfree(nr);
kfree(nr_to_reclaim);
return -ENOMEM;
}
/*
* scan the LRU linked list of each memory node to obtain the
* swapcache pages that can be reclaimd.
*/
for_each_node_state(nid, N_MEMORY) {
INIT_LIST_HEAD(&swapcache_list[nid_num]);
cond_resched();
pgdat = NODE_DATA(nid);
memcg = mem_cgroup_iter(target_memcg, NULL, NULL);
do {
cond_resched();
pos = NULL;
lruvec = mem_cgroup_lruvec(memcg, pgdat);
src = &(lruvec->lists[LRU_INACTIVE_ANON]);
spin_lock_irq(&lruvec->lru_lock);
scan_count = 0;
/*
* Scan the swapcache pages that are not mapped from
* the end of the LRU linked list, scan SWAP_SCAN_NUM_MAX
* pages each time, and record the scan end point page.
*/
pos = list_last_entry(src, struct page, lru);
spin_unlock_irq(&lruvec->lru_lock);
do_scan:
cond_resched();
scan_count = 0;
spin_lock_irq(&lruvec->lru_lock);
/*
* check if pos page is been released or not in LRU list, if true,
* cancel the subsequent page scanning of the current node.
*/
if (!pos || list_entry_is_head(pos, src, lru)) {
spin_unlock_irq(&lruvec->lru_lock);
continue;
}
if (!PageLRU(pos) || page_lru(pos) != LRU_INACTIVE_ANON) {
spin_unlock_irq(&lruvec->lru_lock);
continue;
}
page = pos;
pos = NULL;
/* Continue to scan down from the last scan breakpoint */
list_for_each_entry_safe_reverse_from(page, next, src, lru) {
scan_count++;
pos = next;
if (scan_count >= SWAP_SCAN_NUM_MAX)
break;
if (!PageSwapCache(page))
continue;
if (page_mapped(page))
continue;
if (add_page_for_reclaim_swapcache(page,
&swapcache_list[nid_num],
lruvec, LRU_INACTIVE_ANON) != 0)
continue;
nr[nid_num]++;
swapcache_total_reclaimable++;
}
spin_unlock_irq(&lruvec->lru_lock);
/*
* Check whether the scanned pages meet
* the reclaim requirements.
*/
if (swapcache_total_reclaimable <= swapcache_to_reclaim ||
scan_count >= SWAP_SCAN_NUM_MAX)
goto do_scan;
} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
/* Start reclaiming the next memory node. */
nid_num++;
}
/* reclaim the swapcache page until the requirements are meet. */
do {
nid_num = 0;
reclaim_page_count = 0;
/* start swapcache page reclaim for each node. */
for_each_node_state(nid, N_MEMORY) {
cond_resched();
nr_to_reclaim[nid_num] = (swapcache_to_reclaim /
(swapcache_total_reclaimable / nr[nid_num]));
reclaim_page_count += reclaim_swapcache_pages_from_list(nid,
&swapcache_list[nid_num],
nr_to_reclaim[nid_num], false);
nid_num++;
}
nr_reclaimed += reclaim_page_count;
/*
* Check whether the swapcache page reaches the reclaim requirement or
* the number of the swapcache page reclaimd is 0. Stop reclaim.
*/
if (nr_reclaimed >= swapcache_to_reclaim || reclaim_page_count == 0)
goto exit;
} while (!swapcache_below_watermark(swapcache_watermark) ||
nr_reclaimed < swapcache_to_reclaim);
exit:
nid_num = 0;
/*
* Repopulate the swapcache pages that are not reclaimd back
* to the LRU linked list.
*/
for_each_node_state(nid, N_MEMORY) {
cond_resched();
reclaim_swapcache_pages_from_list(nid,
&swapcache_list[nid_num], 0, true);
nid_num++;
}
kfree(nr);
kfree(nr_to_reclaim);
kfree(swapcache_list);
return 0;
}
EXPORT_SYMBOL_GPL(do_swapcache_reclaim);
#endif #endif
#ifdef CONFIG_PAGE_CACHE_LIMIT #ifdef CONFIG_PAGE_CACHE_LIMIT
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册