提交 892970b7 编写于 作者: X Xu Yu

alinux: mm, memcg: add memsli procfs switch interface

to #26424368

Since memsli also records latency histogram for swapout and swapin,
which are NOT in the slow memory path, the overhead of memsli could
be nonnegligible in some specific scenarios.

For example, in scenarios with frequent swapping out and in, memsli
could introduce overhead of ~1% of total run time of the synthetic
testcase.

This adds procfs interface for memsli switch. The memsli feature is
enabled by default, and you can now disable it by:

$ echo 0 > /proc/memsli/enabled

Apparently, you can check current memsli switch status by:

$ cat /proc/memsli/enabled

Note that disabling memsli at runtime will NOT clear the existing
latency histogram. You still need to manually reset the specified
latency histogram(s) by echo 0 into the corresponding cgroup control
file(s).
Signed-off-by: NXu Yu <xuyu@linux.alibaba.com>
Reviewed-by: NYang Shi <yang.shi@linux.alibaba.com>
Reviewed-by: NXunlei Pang <xlpang@linux.alibaba.com>
上级 77663a9d
...@@ -937,6 +937,8 @@ void memcg_check_wmark_min_adj(struct task_struct *curr, ...@@ -937,6 +937,8 @@ void memcg_check_wmark_min_adj(struct task_struct *curr,
struct alloc_context *ac); struct alloc_context *ac);
extern void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration); extern void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration);
extern void memcg_lat_stat_start(u64 *start);
extern u64 memcg_lat_stat_end(u64 start);
#else /* CONFIG_MEMCG */ #else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0 #define MEM_CGROUP_ID_SHIFT 0
...@@ -1292,6 +1294,15 @@ static inline void memcg_lat_stat_update(enum mem_lat_stat_item sidx, ...@@ -1292,6 +1294,15 @@ static inline void memcg_lat_stat_update(enum mem_lat_stat_item sidx,
u64 duration) u64 duration)
{ {
} }
static inline void memcg_lat_stat_start(u64 *start)
{
}
static inline u64 memcg_lat_stat_end(u64 start)
{
return 0;
}
#endif /* CONFIG_MEMCG */ #endif /* CONFIG_MEMCG */
/* idx can be of type enum memcg_stat_item or node_stat_item */ /* idx can be of type enum memcg_stat_item or node_stat_item */
......
...@@ -71,6 +71,7 @@ ...@@ -71,6 +71,7 @@
#include <net/sock.h> #include <net/sock.h>
#include <net/ip.h> #include <net/ip.h>
#include "slab.h" #include "slab.h"
#include <linux/proc_fs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -89,6 +90,9 @@ static bool cgroup_memory_nosocket; ...@@ -89,6 +90,9 @@ static bool cgroup_memory_nosocket;
/* Kernel memory accounting disabled? */ /* Kernel memory accounting disabled? */
static bool cgroup_memory_nokmem; static bool cgroup_memory_nokmem;
/* Cgroup memory SLI disabled? */
static DEFINE_STATIC_KEY_FALSE(cgroup_memory_nosli);
/* Whether the swap controller is active */ /* Whether the swap controller is active */
#ifdef CONFIG_MEMCG_SWAP #ifdef CONFIG_MEMCG_SWAP
int do_swap_account __read_mostly; int do_swap_account __read_mostly;
...@@ -2461,7 +2465,7 @@ void mem_cgroup_handle_over_high(void) ...@@ -2461,7 +2465,7 @@ void mem_cgroup_handle_over_high(void)
return; return;
memcg = get_mem_cgroup_from_mm(current->mm); memcg = get_mem_cgroup_from_mm(current->mm);
start = ktime_get_ns(); memcg_lat_stat_start(&start);
reclaim_high(memcg, nr_pages, GFP_KERNEL); reclaim_high(memcg, nr_pages, GFP_KERNEL);
current->memcg_nr_pages_over_high = 0; current->memcg_nr_pages_over_high = 0;
...@@ -2532,7 +2536,7 @@ void mem_cgroup_handle_over_high(void) ...@@ -2532,7 +2536,7 @@ void mem_cgroup_handle_over_high(void)
out: out:
memcg_lat_stat_update(MEM_LAT_MEMCG_DIRECT_RECLAIM, memcg_lat_stat_update(MEM_LAT_MEMCG_DIRECT_RECLAIM,
(ktime_get_ns() - start)); memcg_lat_stat_end(start));
css_put(&memcg->css); css_put(&memcg->css);
} }
...@@ -2608,11 +2612,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, ...@@ -2608,11 +2612,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
memcg_memory_event(mem_over_limit, MEMCG_MAX); memcg_memory_event(mem_over_limit, MEMCG_MAX);
start = ktime_get_ns(); memcg_lat_stat_start(&start);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
gfp_mask, may_swap); gfp_mask, may_swap);
memcg_lat_stat_update(MEM_LAT_MEMCG_DIRECT_RECLAIM, memcg_lat_stat_update(MEM_LAT_MEMCG_DIRECT_RECLAIM,
(ktime_get_ns() - start)); memcg_lat_stat_end(start));
if (mem_cgroup_margin(mem_over_limit) >= nr_pages) if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
goto retry; goto retry;
...@@ -4627,6 +4631,9 @@ void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration) ...@@ -4627,6 +4631,9 @@ void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration)
struct mem_cgroup *memcg, *iter; struct mem_cgroup *memcg, *iter;
enum mem_lat_count_t cidx; enum mem_lat_count_t cidx;
if (static_branch_unlikely(&cgroup_memory_nosli))
return;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
return; return;
...@@ -4641,6 +4648,22 @@ void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration) ...@@ -4641,6 +4648,22 @@ void memcg_lat_stat_update(enum mem_lat_stat_item sidx, u64 duration)
css_put(&memcg->css); css_put(&memcg->css);
} }
void memcg_lat_stat_start(u64 *start)
{
if (!static_branch_unlikely(&cgroup_memory_nosli) &&
!mem_cgroup_disabled())
*start = ktime_get_ns();
}
u64 memcg_lat_stat_end(u64 start)
{
if (!static_branch_unlikely(&cgroup_memory_nosli) &&
!mem_cgroup_disabled())
return ktime_get_ns() - start;
else
return 0;
}
static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
{ {
struct mem_cgroup_threshold_ary *t; struct mem_cgroup_threshold_ary *t;
...@@ -7754,6 +7777,56 @@ static int __init enable_cgroup_writeback_v1(char *s) ...@@ -7754,6 +7777,56 @@ static int __init enable_cgroup_writeback_v1(char *s)
__setup("cgwb_v1", enable_cgroup_writeback_v1); __setup("cgwb_v1", enable_cgroup_writeback_v1);
#endif #endif
static int memsli_enabled_show(struct seq_file *m, void *v)
{
seq_printf(m, "%d\n", !static_key_enabled(&cgroup_memory_nosli));
return 0;
}
static int memsli_enabled_open(struct inode *inode, struct file *file)
{
return single_open(file, memsli_enabled_show, NULL);
}
static ssize_t memsli_enabled_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *ppos)
{
char val = -1;
int ret = count;
if (count < 1 || *ppos) {
ret = -EINVAL;
goto out;
}
if (copy_from_user(&val, ubuf, 1)) {
ret = -EFAULT;
goto out;
}
switch (val) {
case '0':
static_branch_enable(&cgroup_memory_nosli);
break;
case '1':
static_branch_disable(&cgroup_memory_nosli);
break;
default:
ret = -EINVAL;
}
out:
return ret;
}
static const struct file_operations memsli_enabled_fops = {
.open = memsli_enabled_open,
.read = seq_read,
.write = memsli_enabled_write,
.llseek = seq_lseek,
.release = single_release,
};
/* /*
* subsys_initcall() for memory controller. * subsys_initcall() for memory controller.
* *
...@@ -7765,6 +7838,7 @@ __setup("cgwb_v1", enable_cgroup_writeback_v1); ...@@ -7765,6 +7838,7 @@ __setup("cgwb_v1", enable_cgroup_writeback_v1);
static int __init mem_cgroup_init(void) static int __init mem_cgroup_init(void)
{ {
int cpu, node; int cpu, node;
struct proc_dir_entry *memsli_dir, *memsli_enabled_file;
memcg_wmark_wq = alloc_workqueue("memcg_wmark", WQ_MEM_RECLAIM | memcg_wmark_wq = alloc_workqueue("memcg_wmark", WQ_MEM_RECLAIM |
WQ_UNBOUND | WQ_FREEZABLE, WQ_UNBOUND | WQ_FREEZABLE,
...@@ -7773,6 +7847,17 @@ static int __init mem_cgroup_init(void) ...@@ -7773,6 +7847,17 @@ static int __init mem_cgroup_init(void)
if (!memcg_wmark_wq) if (!memcg_wmark_wq)
return -ENOMEM; return -ENOMEM;
memsli_dir = proc_mkdir("memsli", NULL);
if (!memsli_dir)
return -ENOMEM;
memsli_enabled_file = proc_create("enabled", 0600,
memsli_dir, &memsli_enabled_fops);
if (!memsli_enabled_file) {
remove_proc_entry("memsli", NULL);
return -ENOMEM;
}
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
/* /*
* Kmem cache creation is mostly done with the slab_mutex held, * Kmem cache creation is mostly done with the slab_mutex held,
......
...@@ -4042,11 +4042,13 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) ...@@ -4042,11 +4042,13 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
} }
if (!pte_present(vmf->orig_pte)) { if (!pte_present(vmf->orig_pte)) {
u64 start = ktime_get_ns(); vm_fault_t retval;
vm_fault_t retval = do_swap_page(vmf); u64 start;
memcg_lat_stat_start(&start);
retval = do_swap_page(vmf);
memcg_lat_stat_update(MEM_LAT_DIRECT_SWAPIN, memcg_lat_stat_update(MEM_LAT_DIRECT_SWAPIN,
(ktime_get_ns() - start)); memcg_lat_stat_end(start));
return retval; return retval;
} }
......
...@@ -3726,14 +3726,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, ...@@ -3726,14 +3726,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
return NULL; return NULL;
psi_memstall_enter(&pflags); psi_memstall_enter(&pflags);
start = ktime_get_ns(); memcg_lat_stat_start(&start);
noreclaim_flag = memalloc_noreclaim_save(); noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
prio); prio);
memalloc_noreclaim_restore(noreclaim_flag); memalloc_noreclaim_restore(noreclaim_flag);
memcg_lat_stat_update(MEM_LAT_DIRECT_COMPACT, (ktime_get_ns() - start)); memcg_lat_stat_update(MEM_LAT_DIRECT_COMPACT,
memcg_lat_stat_end(start));
psi_memstall_leave(&pflags); psi_memstall_leave(&pflags);
if (*compact_result <= COMPACT_INACTIVE) if (*compact_result <= COMPACT_INACTIVE)
...@@ -3941,7 +3942,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, ...@@ -3941,7 +3942,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
/* We now go into synchronous reclaim */ /* We now go into synchronous reclaim */
cpuset_memory_pressure_bump(); cpuset_memory_pressure_bump();
psi_memstall_enter(&pflags); psi_memstall_enter(&pflags);
start = ktime_get_ns(); memcg_lat_stat_start(&start);
fs_reclaim_acquire(gfp_mask); fs_reclaim_acquire(gfp_mask);
noreclaim_flag = memalloc_noreclaim_save(); noreclaim_flag = memalloc_noreclaim_save();
reclaim_state.reclaimed_slab = 0; reclaim_state.reclaimed_slab = 0;
...@@ -3954,7 +3955,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, ...@@ -3954,7 +3955,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag); memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(gfp_mask); fs_reclaim_release(gfp_mask);
memcg_lat_stat_update(MEM_LAT_GLOBAL_DIRECT_RECLAIM, memcg_lat_stat_update(MEM_LAT_GLOBAL_DIRECT_RECLAIM,
(ktime_get_ns() - start)); memcg_lat_stat_end(start));
psi_memstall_leave(&pflags); psi_memstall_leave(&pflags);
cond_resched(); cond_resched();
......
...@@ -1695,10 +1695,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, ...@@ -1695,10 +1695,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
count_memcg_event_mm(charge_mm, PGMAJFAULT); count_memcg_event_mm(charge_mm, PGMAJFAULT);
} }
/* Here we actually start the io */ /* Here we actually start the io */
start = ktime_get_ns(); memcg_lat_stat_start(&start);
page = shmem_swapin(swap, gfp, info, index); page = shmem_swapin(swap, gfp, info, index);
memcg_lat_stat_update(MEM_LAT_DIRECT_SWAPIN, memcg_lat_stat_update(MEM_LAT_DIRECT_SWAPIN,
(ktime_get_ns() - start)); memcg_lat_stat_end(start));
if (!page) { if (!page) {
error = -ENOMEM; error = -ENOMEM;
goto failed; goto failed;
......
...@@ -916,13 +916,13 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, ...@@ -916,13 +916,13 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
SetPageReclaim(page); SetPageReclaim(page);
if (!current_is_kswapd()) if (!current_is_kswapd())
start = ktime_get_ns(); memcg_lat_stat_start(&start);
res = mapping->a_ops->writepage(page, &wbc); res = mapping->a_ops->writepage(page, &wbc);
if (!current_is_kswapd()) if (!current_is_kswapd())
memcg_lat_stat_update(global_reclaim(sc) ? memcg_lat_stat_update(global_reclaim(sc) ?
MEM_LAT_GLOBAL_DIRECT_SWAPOUT : MEM_LAT_GLOBAL_DIRECT_SWAPOUT :
MEM_LAT_MEMCG_DIRECT_SWAPOUT, MEM_LAT_MEMCG_DIRECT_SWAPOUT,
(ktime_get_ns() - start)); memcg_lat_stat_end(start));
if (res < 0) if (res < 0)
handle_write_error(mapping, page, res); handle_write_error(mapping, page, res);
if (res == AOP_WRITEPAGE_ACTIVATE) { if (res == AOP_WRITEPAGE_ACTIVATE) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册