提交 f7e1cb6e 编写于 作者: J Johannes Weiner 提交者: Linus Torvalds

mm: memcontrol: account socket memory in unified hierarchy memory controller

Socket memory can be a significant share of overall memory consumed by
common workloads.  In order to provide reasonable resource isolation in
the unified hierarchy, this type of memory needs to be included in the
tracking/accounting of a cgroup under active memory resource control.

Overhead is only incurred when a non-root control group is created AND
the memory controller is instructed to track and account the memory
footprint of that group.  cgroup.memory=nosocket can be specified on the
boot commandline to override any runtime configuration and forcibly
exclude socket memory from active memory resource control.
Signed-off-by: NJohannes Weiner <hannes@cmpxchg.org>
Acked-by: NDavid S. Miller <davem@davemloft.net>
Reviewed-by: NVladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: NMichal Hocko <mhocko@suse.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 11092087
...@@ -608,6 +608,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -608,6 +608,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
cut the overhead, others just disable the usage. So cut the overhead, others just disable the usage. So
only cgroup_disable=memory is actually worthy} only cgroup_disable=memory is actually worthy}
cgroup.memory= [KNL] Pass options to the cgroup memory controller.
Format: <string>
nosocket -- Disable socket memory accounting.
checkreqprot [SELINUX] Set initial checkreqprot flag value. checkreqprot [SELINUX] Set initial checkreqprot flag value.
Format: { "0" | "1" } Format: { "0" | "1" }
See security/selinux/Kconfig help text. See security/selinux/Kconfig help text.
......
...@@ -170,6 +170,9 @@ struct mem_cgroup { ...@@ -170,6 +170,9 @@ struct mem_cgroup {
unsigned long low; unsigned long low;
unsigned long high; unsigned long high;
/* Range enforcement for interrupt charges */
struct work_struct high_work;
unsigned long soft_limit; unsigned long soft_limit;
/* vmpressure notifications */ /* vmpressure notifications */
...@@ -680,12 +683,16 @@ void sock_update_memcg(struct sock *sk); ...@@ -680,12 +683,16 @@ void sock_update_memcg(struct sock *sk);
void sock_release_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk);
bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) #if defined(CONFIG_MEMCG) && defined(CONFIG_INET)
extern struct static_key memcg_sockets_enabled_key; extern struct static_key memcg_sockets_enabled_key;
#define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key) #define mem_cgroup_sockets_enabled static_key_false(&memcg_sockets_enabled_key)
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{ {
#ifdef CONFIG_MEMCG_KMEM
return memcg->tcp_mem.memory_pressure; return memcg->tcp_mem.memory_pressure;
#else
return false;
#endif
} }
#else #else
#define mem_cgroup_sockets_enabled 0 #define mem_cgroup_sockets_enabled 0
......
...@@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; ...@@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
#define MEM_CGROUP_RECLAIM_RETRIES 5 #define MEM_CGROUP_RECLAIM_RETRIES 5
/* Socket memory accounting disabled? */
static bool cgroup_memory_nosocket;
/* Whether the swap controller is active */ /* Whether the swap controller is active */
#ifdef CONFIG_MEMCG_SWAP #ifdef CONFIG_MEMCG_SWAP
int do_swap_account __read_mostly; int do_swap_account __read_mostly;
...@@ -1945,6 +1948,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, ...@@ -1945,6 +1948,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
return NOTIFY_OK; return NOTIFY_OK;
} }
static void reclaim_high(struct mem_cgroup *memcg,
unsigned int nr_pages,
gfp_t gfp_mask)
{
do {
if (page_counter_read(&memcg->memory) <= memcg->high)
continue;
mem_cgroup_events(memcg, MEMCG_HIGH, 1);
try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
} while ((memcg = parent_mem_cgroup(memcg)));
}
static void high_work_func(struct work_struct *work)
{
struct mem_cgroup *memcg;
memcg = container_of(work, struct mem_cgroup, high_work);
reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
}
/* /*
* Scheduled by try_charge() to be executed from the userland return path * Scheduled by try_charge() to be executed from the userland return path
* and reclaims memory over the high limit. * and reclaims memory over the high limit.
...@@ -1952,20 +1975,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, ...@@ -1952,20 +1975,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
void mem_cgroup_handle_over_high(void) void mem_cgroup_handle_over_high(void)
{ {
unsigned int nr_pages = current->memcg_nr_pages_over_high; unsigned int nr_pages = current->memcg_nr_pages_over_high;
struct mem_cgroup *memcg, *pos; struct mem_cgroup *memcg;
if (likely(!nr_pages)) if (likely(!nr_pages))
return; return;
pos = memcg = get_mem_cgroup_from_mm(current->mm); memcg = get_mem_cgroup_from_mm(current->mm);
reclaim_high(memcg, nr_pages, GFP_KERNEL);
do {
if (page_counter_read(&pos->memory) <= pos->high)
continue;
mem_cgroup_events(pos, MEMCG_HIGH, 1);
try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
} while ((pos = parent_mem_cgroup(pos)));
css_put(&memcg->css); css_put(&memcg->css);
current->memcg_nr_pages_over_high = 0; current->memcg_nr_pages_over_high = 0;
} }
...@@ -2100,6 +2116,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, ...@@ -2100,6 +2116,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
*/ */
do { do {
if (page_counter_read(&memcg->memory) > memcg->high) { if (page_counter_read(&memcg->memory) > memcg->high) {
/* Don't bother a random interrupted task */
if (in_interrupt()) {
schedule_work(&memcg->high_work);
break;
}
current->memcg_nr_pages_over_high += batch; current->memcg_nr_pages_over_high += batch;
set_notify_resume(current); set_notify_resume(current);
break; break;
...@@ -4150,6 +4171,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) ...@@ -4150,6 +4171,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
{ {
int node; int node;
cancel_work_sync(&memcg->high_work);
mem_cgroup_remove_from_trees(memcg); mem_cgroup_remove_from_trees(memcg);
for_each_node(node) for_each_node(node)
...@@ -4196,6 +4219,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -4196,6 +4219,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
page_counter_init(&memcg->kmem, NULL); page_counter_init(&memcg->kmem, NULL);
} }
INIT_WORK(&memcg->high_work, high_work_func);
memcg->last_scanned_node = MAX_NUMNODES; memcg->last_scanned_node = MAX_NUMNODES;
INIT_LIST_HEAD(&memcg->oom_notify); INIT_LIST_HEAD(&memcg->oom_notify);
memcg->move_charge_at_immigrate = 0; memcg->move_charge_at_immigrate = 0;
...@@ -4267,6 +4291,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) ...@@ -4267,6 +4291,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
if (ret) if (ret)
return ret; return ret;
#ifdef CONFIG_INET
if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
static_key_slow_inc(&memcg_sockets_enabled_key);
#endif
/* /*
* Make sure the memcg is initialized: mem_cgroup_iter() * Make sure the memcg is initialized: mem_cgroup_iter()
* orders reading memcg->initialized against its callers * orders reading memcg->initialized against its callers
...@@ -4313,6 +4342,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) ...@@ -4313,6 +4342,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *memcg = mem_cgroup_from_css(css);
memcg_destroy_kmem(memcg); memcg_destroy_kmem(memcg);
#ifdef CONFIG_INET
if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
static_key_slow_dec(&memcg_sockets_enabled_key);
#endif
__mem_cgroup_free(memcg); __mem_cgroup_free(memcg);
} }
...@@ -5533,8 +5566,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) ...@@ -5533,8 +5566,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
commit_charge(newpage, memcg, true); commit_charge(newpage, memcg, true);
} }
/* Writing them here to avoid exposing memcg's inner layout */ #ifdef CONFIG_INET
#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
struct static_key memcg_sockets_enabled_key; struct static_key memcg_sockets_enabled_key;
EXPORT_SYMBOL(memcg_sockets_enabled_key); EXPORT_SYMBOL(memcg_sockets_enabled_key);
...@@ -5559,10 +5591,15 @@ void sock_update_memcg(struct sock *sk) ...@@ -5559,10 +5591,15 @@ void sock_update_memcg(struct sock *sk)
rcu_read_lock(); rcu_read_lock();
memcg = mem_cgroup_from_task(current); memcg = mem_cgroup_from_task(current);
if (memcg != root_mem_cgroup && if (memcg == root_mem_cgroup)
memcg->tcp_mem.active && goto out;
css_tryget_online(&memcg->css)) #ifdef CONFIG_MEMCG_KMEM
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active)
goto out;
#endif
if (css_tryget_online(&memcg->css))
sk->sk_memcg = memcg; sk->sk_memcg = memcg;
out:
rcu_read_unlock(); rcu_read_unlock();
} }
EXPORT_SYMBOL(sock_update_memcg); EXPORT_SYMBOL(sock_update_memcg);
...@@ -5583,15 +5620,30 @@ void sock_release_memcg(struct sock *sk) ...@@ -5583,15 +5620,30 @@ void sock_release_memcg(struct sock *sk)
*/ */
bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
{ {
struct page_counter *counter; gfp_t gfp_mask = GFP_KERNEL;
if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated, #ifdef CONFIG_MEMCG_KMEM
nr_pages, &counter)) { if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
memcg->tcp_mem.memory_pressure = 0; struct page_counter *counter;
return true;
if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
nr_pages, &counter)) {
memcg->tcp_mem.memory_pressure = 0;
return true;
}
page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
memcg->tcp_mem.memory_pressure = 1;
return false;
} }
page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages); #endif
memcg->tcp_mem.memory_pressure = 1; /* Don't block in the packet receive path */
if (in_softirq())
gfp_mask = GFP_NOWAIT;
if (try_charge(memcg, gfp_mask, nr_pages) == 0)
return true;
try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
return false; return false;
} }
...@@ -5602,10 +5654,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) ...@@ -5602,10 +5654,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
*/ */
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
{ {
page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages); #ifdef CONFIG_MEMCG_KMEM
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
page_counter_uncharge(&memcg->tcp_mem.memory_allocated,
nr_pages);
return;
}
#endif
page_counter_uncharge(&memcg->memory, nr_pages);
css_put_many(&memcg->css, nr_pages);
} }
#endif #endif /* CONFIG_INET */
static int __init cgroup_memory(char *s)
{
char *token;
while ((token = strsep(&s, ",")) != NULL) {
if (!*token)
continue;
if (!strcmp(token, "nosocket"))
cgroup_memory_nosocket = true;
}
return 0;
}
__setup("cgroup.memory=", cgroup_memory);
/* /*
* subsys_initcall() for memory controller. * subsys_initcall() for memory controller.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册