提交 682fc25d 编写于 作者: L Liu Shixin

mm/swapfile: introduce per-memcg swapfile control

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7CGGT
CVE: NA

--------------------------------

With memory.swapfile interface, the avail swap device can be limit for
memcg. The acceptable parameters are 'all', 'none' and valid swap device.
Usage:
	echo /dev/zram0 > memory.swapfile

If the swap device is offline, the swapfile will be fallback to 'none'.
Signed-off-by: NLiu Shixin <liushixin2@huawei.com>
上级 5361bef3
......@@ -80,6 +80,7 @@ Brief summary of control files.
memory.force_empty trigger forced page reclaim
memory.force_swapin trigger forced swapin anon page
memory.swap.max set/show limit for swap
memory.swapfile set/show available swap file
memory.pressure_level set memory pressure notifications
memory.swappiness set/show swappiness parameter of vmscan
(See sysctl's vm.swappiness)
......
......@@ -50,6 +50,11 @@ enum memcg_memory_event {
MEMCG_NR_MEMORY_EVENTS,
};
enum {
SWAP_TYPE_ALL = -1, /* allowd use all swap file */
SWAP_TYPE_NONE = -2, /* prohibited use any swapfile */
};
struct mem_cgroup_reclaim_cookie {
pg_data_t *pgdat;
unsigned int generation;
......@@ -242,6 +247,7 @@ struct obj_cgroup {
struct swap_device {
unsigned long max;
int type;
};
/*
......@@ -1307,6 +1313,9 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg)
int mem_cgroup_force_empty(struct mem_cgroup *memcg);
int memcg_get_swap_type(struct page *page);
void memcg_remove_swapfile(int type);
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
......@@ -1714,6 +1723,15 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
static inline void memcg_print_bad_task(struct oom_control *oc)
{
}
static inline int memcg_get_swap_type(struct page *page)
{
return SWAP_TYPE_ALL;
}
static inline void memcg_remove_swapfile(int type)
{
}
#endif /* CONFIG_MEMCG */
/* idx can be of type enum memcg_stat_item or node_stat_item */
......
......@@ -511,11 +511,14 @@ static inline long get_nr_swap_pages(void)
return atomic_long_read(&nr_swap_pages);
}
extern long get_nr_swap_pages_type(int type);
extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(struct page *page);
extern void put_swap_page(struct page *page, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size,
int type);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
......@@ -547,6 +550,11 @@ static inline void put_swap_device(struct swap_info_struct *si)
percpu_ref_put(&si->sei->users);
}
#ifdef CONFIG_MEMCG_SWAP_QOS
extern int write_swapfile_for_memcg(struct address_space *mapping,
int *swap_type);
extern void read_swapfile_for_memcg(struct seq_file *m, int type);
#endif
#else /* CONFIG_SWAP */
static inline int swap_readpage(struct page *page, bool do_poll)
......
......@@ -4065,8 +4065,10 @@ static void memcg_swap_qos_reset(void)
{
struct mem_cgroup *memcg;
for_each_mem_cgroup(memcg)
for_each_mem_cgroup(memcg) {
WRITE_ONCE(memcg->swap_dev->max, PAGE_COUNTER_MAX);
WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
}
}
static int sysctl_memcg_swap_qos_handler(struct ctl_table *table, int write,
......@@ -4157,11 +4159,15 @@ static void memcg_free_swap_device(struct mem_cgroup *memcg)
static void memcg_swap_device_init(struct mem_cgroup *memcg,
struct mem_cgroup *parent)
{
if (!static_branch_likely(&memcg_swap_qos_key) || !parent)
if (!static_branch_likely(&memcg_swap_qos_key) || !parent) {
WRITE_ONCE(memcg->swap_dev->max, PAGE_COUNTER_MAX);
else
WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
} else {
WRITE_ONCE(memcg->swap_dev->max,
READ_ONCE(parent->swap_dev->max));
WRITE_ONCE(memcg->swap_dev->type,
READ_ONCE(parent->swap_dev->type));
}
}
u64 memcg_swapmax_read(struct cgroup_subsys_state *css, struct cftype *cft)
......@@ -4235,6 +4241,121 @@ static int mem_cgroup_check_swap_for_v1(struct page *page, swp_entry_t entry)
return 0;
}
static int memcg_swapfile_read(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
int type;
if (!static_branch_likely(&memcg_swap_qos_key)) {
seq_printf(m, "all\n");
return 0;
}
type = READ_ONCE(memcg->swap_dev->type);
if (type == SWAP_TYPE_NONE)
seq_printf(m, "none\n");
else if (type == SWAP_TYPE_ALL)
seq_printf(m, "all\n");
else
read_swapfile_for_memcg(m, type);
return 0;
}
static ssize_t memcg_swapfile_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
struct filename *pathname;
struct file *swapfile;
int ret;
if (!static_branch_likely(&memcg_swap_qos_key))
return -EACCES;
buf = strstrip(buf);
if (!strcmp(buf, "none")) {
WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_NONE);
return nbytes;
} else if (!strcmp(buf, "all")) {
WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
return nbytes;
}
pathname = getname_kernel(buf);
if (IS_ERR(pathname))
return PTR_ERR(pathname);
swapfile = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
if (IS_ERR(swapfile)) {
putname(pathname);
return PTR_ERR(swapfile);
}
ret = write_swapfile_for_memcg(swapfile->f_mapping,
&memcg->swap_dev->type);
filp_close(swapfile, NULL);
putname(pathname);
return ret < 0 ? ret : nbytes;
}
int memcg_get_swap_type(struct page *page)
{
struct mem_cgroup *memcg;
int type;
if (!static_branch_likely(&memcg_swap_qos_key))
return SWAP_TYPE_ALL;
if (!page)
return SWAP_TYPE_ALL;
rcu_read_lock();
memcg = page_memcg(page);
if (!memcg || mem_cgroup_is_root(memcg)) {
rcu_read_unlock();
return SWAP_TYPE_ALL;
}
if (!css_tryget_online(&memcg->css)) {
rcu_read_unlock();
return SWAP_TYPE_ALL;
}
rcu_read_unlock();
type = READ_ONCE(memcg->swap_dev->type);
css_put(&memcg->css);
return type;
}
void memcg_remove_swapfile(int type)
{
struct mem_cgroup *memcg;
if (!static_branch_likely(&memcg_swap_qos_key))
return;
for_each_mem_cgroup(memcg)
if (READ_ONCE(memcg->swap_dev->type) == type)
WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_NONE);
}
static long mem_cgroup_get_nr_swap_pages_type(struct mem_cgroup *memcg)
{
int type;
if (!static_branch_likely(&memcg_swap_qos_key))
return mem_cgroup_get_nr_swap_pages(memcg);
type = READ_ONCE(memcg->swap_dev->type);
if (type == SWAP_TYPE_ALL)
return mem_cgroup_get_nr_swap_pages(memcg);
else if (type == SWAP_TYPE_NONE)
return 0;
else
return get_nr_swap_pages_type(type);
}
#else
static int memcg_alloc_swap_device(struct mem_cgroup *memcg)
{
......@@ -4254,6 +4375,21 @@ static int mem_cgroup_check_swap_for_v1(struct page *page, swp_entry_t entry)
{
return 0;
}
int memcg_get_swap_type(struct page *page)
{
return SWAP_TYPE_ALL;
}
void memcg_remove_swapfile(int type)
{
}
static long mem_cgroup_get_nr_swap_pages_type(struct mem_cgroup *memcg)
{
return mem_cgroup_get_nr_swap_pages(memcg);
}
#endif
#ifdef CONFIG_NUMA
......@@ -5523,7 +5659,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
/* If only reclaim swap pages, check swap space at first. */
if ((reclaim_options & MEMCG_RECLAIM_NOT_FILE) &&
(mem_cgroup_get_nr_swap_pages(memcg) <= 0))
(mem_cgroup_get_nr_swap_pages_type(memcg) <= 0))
return -EAGAIN;
/* This is the final attempt, drain percpu lru caches in the
......@@ -5960,6 +6096,12 @@ static struct cftype mem_cgroup_legacy_files[] = {
.write = memcg_swapmax_write,
.read_u64 = memcg_swapmax_read,
},
{
.name = "swapfile",
.flags = CFTYPE_NOT_ON_ROOT,
.write = memcg_swapfile_write,
.seq_show = memcg_swapfile_read,
},
#endif
{
.name = "high_async_ratio",
......
......@@ -266,7 +266,7 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
cache->cur = 0;
if (swap_slot_cache_active)
cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
cache->slots, 1);
cache->slots, 1, SWAP_TYPE_ALL);
return cache->nr;
}
......@@ -307,12 +307,17 @@ swp_entry_t get_swap_page(struct page *page)
{
swp_entry_t entry;
struct swap_slots_cache *cache;
int type;
entry.val = 0;
type = memcg_get_swap_type(page);
if (type == SWAP_TYPE_NONE)
goto out;
if (PageTransHuge(page)) {
if (IS_ENABLED(CONFIG_THP_SWAP))
get_swap_pages(1, &entry, HPAGE_PMD_NR);
get_swap_pages(1, &entry, HPAGE_PMD_NR, type);
goto out;
}
......@@ -327,7 +332,8 @@ swp_entry_t get_swap_page(struct page *page)
*/
cache = raw_cpu_ptr(&swp_slots);
if (likely(check_cache_active() && cache->slots)) {
if (likely(check_cache_active() && cache->slots) &&
type == SWAP_TYPE_ALL) {
mutex_lock(&cache->alloc_lock);
if (cache->slots) {
repeat:
......@@ -344,7 +350,7 @@ swp_entry_t get_swap_page(struct page *page)
goto out;
}
get_swap_pages(1, &entry, 1);
get_swap_pages(1, &entry, 1, type);
out:
if (mem_cgroup_try_charge_swap(page, entry)) {
put_swap_page(page, entry);
......
......@@ -1056,7 +1056,97 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
}
int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
#ifdef CONFIG_MEMCG_SWAP_QOS
int write_swapfile_for_memcg(struct address_space *mapping, int *swap_type)
{
struct swap_info_struct *si;
unsigned int type;
int ret = -EINVAL;
spin_lock(&swap_lock);
for (type = 0; type < nr_swapfiles; type++) {
si = swap_info[type];
if ((si->flags & SWP_WRITEOK) &&
(si->swap_file->f_mapping == mapping)) {
WRITE_ONCE(*swap_type, type);
ret = 0;
break;
}
}
spin_unlock(&swap_lock);
return ret;
}
void read_swapfile_for_memcg(struct seq_file *m, int type)
{
struct swap_info_struct *si;
spin_lock(&swap_lock);
if (type < nr_swapfiles) {
si = swap_info[type];
if (si->flags & SWP_WRITEOK) {
seq_file_path(m, si->swap_file, "\t\n\\");
seq_printf(m, "\n");
}
}
spin_unlock(&swap_lock);
}
long get_nr_swap_pages_type(int type)
{
struct swap_info_struct *si;
long nr_swap_pages = 0;
spin_lock(&swap_lock);
if (type < nr_swapfiles) {
si = swap_info[type];
if (si->flags & SWP_WRITEOK)
nr_swap_pages = si->pages - si->inuse_pages;
}
spin_unlock(&swap_lock);
return nr_swap_pages;
}
static long get_avail_pages(unsigned long size, int type)
{
long avail_pgs = 0;
if (type == SWAP_TYPE_ALL)
return atomic_long_read(&nr_swap_pages) / size;
spin_unlock(&swap_avail_lock);
avail_pgs = get_nr_swap_pages_type(type) / size;
spin_lock(&swap_avail_lock);
return avail_pgs;
}
static inline bool should_skip_swap_type(int swap_type, int type)
{
if (type == SWAP_TYPE_ALL)
return false;
return (type != swap_type);
}
#else
long get_nr_swap_pages_type(int type)
{
return 0;
}
static inline long get_avail_pages(unsigned long size, int type)
{
return atomic_long_read(&nr_swap_pages) / size;
}
static inline bool should_skip_swap_type(int swap_type, int type)
{
return false;
}
#endif
int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size,
int type)
{
unsigned long size = swap_entry_size(entry_size);
struct swap_info_struct *si, *next;
......@@ -1069,7 +1159,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
spin_lock(&swap_avail_lock);
avail_pgs = atomic_long_read(&nr_swap_pages) / size;
avail_pgs = get_avail_pages(size, type);
if (avail_pgs <= 0) {
spin_unlock(&swap_avail_lock);
goto noswap;
......@@ -1086,6 +1176,11 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
spin_unlock(&swap_avail_lock);
spin_lock(&si->lock);
if (should_skip_swap_type(si->type, type)) {
spin_unlock(&si->lock);
spin_lock(&swap_avail_lock);
goto nextsi;
}
if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
spin_lock(&swap_avail_lock);
if (plist_node_empty(&si->avail_lists[node])) {
......@@ -2703,6 +2798,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
cluster_info = p->cluster_info;
p->cluster_info = NULL;
frontswap_map = frontswap_map_get(p);
memcg_remove_swapfile(p->type);
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
arch_swap_invalidate_area(p->type);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册