提交 892970b7 编写于 作者: X Xu Yu

alinux: mm, memcg: add memsli procfs switch interface

to #26424368

Since memsli also records latency histogram for swapout and swapin,
which are NOT in the slow memory path, the overhead of memsli could
be nonnegligible in some specific scenarios.

For example, in scenarios with frequent swapping out and in, memsli
could introduce overhead of ~1% of total run time of the synthetic
testcase.

This adds procfs interface for memsli switch. The memsli feature is
enabled by default, and you can now disable it by:

$ echo 0 > /proc/memsli/enabled

Apparently, you can check current memsli switch status by:

$ cat /proc/memsli/enabled

Note that disabling memsli at runtime will NOT clear the existing
latency histogram. You still need to manually reset the specified
latency histogram(s) by echo 0 into the corresponding cgroup control
file(s).
Signed-off-by: NXu Yu <xuyu@linux.alibaba.com>
Reviewed-by: NYang Shi <yang.shi@linux.alibaba.com>
Reviewed-by: NXunlei Pang <xlpang@linux.alibaba.com>
上级 77663a9d
......@@ -937,6 +937,8 @@ void memcg_check_wmark_min_adj(struct task_struct *curr,
struct alloc_context *ac);
extern void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration);
extern void memcg_lat_stat_start(u64 *start);
extern u64 memcg_lat_stat_end(u64 start);
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
......@@ -1292,6 +1294,15 @@ static inline void memcg_lat_stat_update(enum mem_lat_stat_item sidx,
u64 duration)
{
}
static inline void memcg_lat_stat_start(u64 *start)
{
}
static inline u64 memcg_lat_stat_end(u64 start)
{
return 0;
}
#endif /* CONFIG_MEMCG */
/* idx can be of type enum memcg_stat_item or node_stat_item */
......
......@@ -71,6 +71,7 @@
#include <net/sock.h>
#include <net/ip.h>
#include "slab.h"
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
......@@ -89,6 +90,9 @@ static bool cgroup_memory_nosocket;
/* Kernel memory accounting disabled? */
static bool cgroup_memory_nokmem;
/* Cgroup memory SLI disabled? */
static DEFINE_STATIC_KEY_FALSE(cgroup_memory_nosli);
/* Whether the swap controller is active */
#ifdef CONFIG_MEMCG_SWAP
int do_swap_account __read_mostly;
......@@ -2461,7 +2465,7 @@ void mem_cgroup_handle_over_high(void)
return;
memcg = get_mem_cgroup_from_mm(current->mm);
start = ktime_get_ns();
memcg_lat_stat_start(&start);
reclaim_high(memcg, nr_pages, GFP_KERNEL);
current->memcg_nr_pages_over_high = 0;
......@@ -2532,7 +2536,7 @@ void mem_cgroup_handle_over_high(void)
out:
memcg_lat_stat_update(MEM_LAT_MEMCG_DIRECT_RECLAIM,
(ktime_get_ns() - start));
memcg_lat_stat_end(start));
css_put(&memcg->css);
}
......@@ -2608,11 +2612,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
memcg_memory_event(mem_over_limit, MEMCG_MAX);
start = ktime_get_ns();
memcg_lat_stat_start(&start);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
gfp_mask, may_swap);
memcg_lat_stat_update(MEM_LAT_MEMCG_DIRECT_RECLAIM,
(ktime_get_ns() - start));
memcg_lat_stat_end(start));
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
goto retry;
......@@ -4627,6 +4631,9 @@ void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration)
struct mem_cgroup *memcg, *iter;
enum mem_lat_count_t cidx;
if (static_branch_unlikely(&cgroup_memory_nosli))
return;
if (mem_cgroup_disabled())
return;
......@@ -4641,6 +4648,22 @@ void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration)
css_put(&memcg->css);
}
void memcg_lat_stat_start(u64 *start)
{
if (!static_branch_unlikely(&cgroup_memory_nosli) &&
!mem_cgroup_disabled())
*start = ktime_get_ns();
}
u64 memcg_lat_stat_end(u64 start)
{
if (!static_branch_unlikely(&cgroup_memory_nosli) &&
!mem_cgroup_disabled())
return ktime_get_ns() - start;
else
return 0;
}
static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
{
struct mem_cgroup_threshold_ary *t;
......@@ -7754,6 +7777,56 @@ static int __init enable_cgroup_writeback_v1(char *s)
__setup("cgwb_v1", enable_cgroup_writeback_v1);
#endif
static int memsli_enabled_show(struct seq_file *m, void *v)
{
seq_printf(m, "%d\n", !static_key_enabled(&cgroup_memory_nosli));
return 0;
}
static int memsli_enabled_open(struct inode *inode, struct file *file)
{
return single_open(file, memsli_enabled_show, NULL);
}
static ssize_t memsli_enabled_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *ppos)
{
char val = -1;
int ret = count;
if (count < 1 || *ppos) {
ret = -EINVAL;
goto out;
}
if (copy_from_user(&val, ubuf, 1)) {
ret = -EFAULT;
goto out;
}
switch (val) {
case '0':
static_branch_enable(&cgroup_memory_nosli);
break;
case '1':
static_branch_disable(&cgroup_memory_nosli);
break;
default:
ret = -EINVAL;
}
out:
return ret;
}
static const struct file_operations memsli_enabled_fops = {
.open = memsli_enabled_open,
.read = seq_read,
.write = memsli_enabled_write,
.llseek = seq_lseek,
.release = single_release,
};
/*
* subsys_initcall() for memory controller.
*
......@@ -7765,6 +7838,7 @@ __setup("cgwb_v1", enable_cgroup_writeback_v1);
static int __init mem_cgroup_init(void)
{
int cpu, node;
struct proc_dir_entry *memsli_dir, *memsli_enabled_file;
memcg_wmark_wq = alloc_workqueue("memcg_wmark", WQ_MEM_RECLAIM |
WQ_UNBOUND | WQ_FREEZABLE,
......@@ -7773,6 +7847,17 @@ static int __init mem_cgroup_init(void)
if (!memcg_wmark_wq)
return -ENOMEM;
memsli_dir = proc_mkdir("memsli", NULL);
if (!memsli_dir)
return -ENOMEM;
memsli_enabled_file = proc_create("enabled", 0600,
memsli_dir, &memsli_enabled_fops);
if (!memsli_enabled_file) {
remove_proc_entry("memsli", NULL);
return -ENOMEM;
}
#ifdef CONFIG_MEMCG_KMEM
/*
* Kmem cache creation is mostly done with the slab_mutex held,
......
......@@ -4042,11 +4042,13 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
}
if (!pte_present(vmf->orig_pte)) {
u64 start = ktime_get_ns();
vm_fault_t retval = do_swap_page(vmf);
vm_fault_t retval;
u64 start;
memcg_lat_stat_start(&start);
retval = do_swap_page(vmf);
memcg_lat_stat_update(MEM_LAT_DIRECT_SWAPIN,
(ktime_get_ns() - start));
memcg_lat_stat_end(start));
return retval;
}
......
......@@ -3726,14 +3726,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
return NULL;
psi_memstall_enter(&pflags);
start = ktime_get_ns();
memcg_lat_stat_start(&start);
noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
prio);
memalloc_noreclaim_restore(noreclaim_flag);
memcg_lat_stat_update(MEM_LAT_DIRECT_COMPACT, (ktime_get_ns() - start));
memcg_lat_stat_update(MEM_LAT_DIRECT_COMPACT,
memcg_lat_stat_end(start));
psi_memstall_leave(&pflags);
if (*compact_result <= COMPACT_INACTIVE)
......@@ -3941,7 +3942,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
psi_memstall_enter(&pflags);
start = ktime_get_ns();
memcg_lat_stat_start(&start);
fs_reclaim_acquire(gfp_mask);
noreclaim_flag = memalloc_noreclaim_save();
reclaim_state.reclaimed_slab = 0;
......@@ -3954,7 +3955,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(gfp_mask);
memcg_lat_stat_update(MEM_LAT_GLOBAL_DIRECT_RECLAIM,
(ktime_get_ns() - start));
memcg_lat_stat_end(start));
psi_memstall_leave(&pflags);
cond_resched();
......
......@@ -1695,10 +1695,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
count_memcg_event_mm(charge_mm, PGMAJFAULT);
}
/* Here we actually start the io */
start = ktime_get_ns();
memcg_lat_stat_start(&start);
page = shmem_swapin(swap, gfp, info, index);
memcg_lat_stat_update(MEM_LAT_DIRECT_SWAPIN,
(ktime_get_ns() - start));
memcg_lat_stat_end(start));
if (!page) {
error = -ENOMEM;
goto failed;
......
......@@ -916,13 +916,13 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
SetPageReclaim(page);
if (!current_is_kswapd())
start = ktime_get_ns();
memcg_lat_stat_start(&start);
res = mapping->a_ops->writepage(page, &wbc);
if (!current_is_kswapd())
memcg_lat_stat_update(global_reclaim(sc) ?
MEM_LAT_GLOBAL_DIRECT_SWAPOUT :
MEM_LAT_MEMCG_DIRECT_SWAPOUT,
(ktime_get_ns() - start));
memcg_lat_stat_end(start));
if (res < 0)
handle_write_error(mapping, page, res);
if (res == AOP_WRITEPAGE_ACTIVATE) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册