提交 8962f125 编写于 作者: Z zhongjiang-ali 提交者: Xu Yu

alinux: mm: restrict the print message frequency further when memcg oom triggers

to #24843736

It is because too much memcg oom printed message will trigger the softlockup.
In general, we use the same ratelimit oom_rc between system and memcg
to limit the print message. But it is more frequent to exceed its limit
of the memcg, thus it would will result in oom easily. And A lot of
printed information will be outputed. It's likely to trigger softlockup.

The patch use different ratelimit to limit the memcg and system oom. And
we test the patch using the default value in the memcg, The issue will
go.

[xuyu: adjust corresponding sysctl indexes]
Reviewed-by: NXunlei Pang <xlpang@linux.alibaba.com>
Signed-off-by: Nzhongjiang-ali <zhongjiang-ali@linux.alibaba.com>
上级 f5d6c930
......@@ -72,6 +72,7 @@ ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags)
}
extern struct ratelimit_state printk_ratelimit_state;
extern struct ratelimit_state oom_memcg_rs;
extern int ___ratelimit(struct ratelimit_state *rs, const char *func);
#define __ratelimit(state) ___ratelimit(state, __func__)
......
......@@ -153,6 +153,8 @@ enum
KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */
KERN_PRINTK_MEMCG_OOM_INTERVAL=78, /* int: tune printk oom ratelimiting */
KERN_PRINTK_MEMCG_OOM_BURST=79, /* int: tune printk oom ratelimiting */
};
......
......@@ -820,6 +820,20 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "printk_memcg_oom_interval",
.data = &oom_memcg_rs.interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "printk_memcg_oom_burst",
.data = &oom_memcg_rs.burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "printk_ratelimit",
.data = &printk_ratelimit_state.interval,
......
......@@ -122,6 +122,9 @@ static const struct bin_table bin_kern_table[] = {
{ CTL_INT, KERN_HPPA_PWRSW, "soft-power" },
{ CTL_INT, KERN_HPPA_UNALIGNED, "unaligned-trap" },
{ CTL_INT, KERN_PRINTK_MEMCG_OOM_INTERVAL, "printk_memcg_oom_interval"},
{ CTL_INT, KERN_PRINTK_MEMCG_OOM_BURST, "printk_memcg_oom_burst"},
{ CTL_INT, KERN_PRINTK_RATELIMIT, "printk_ratelimit" },
{ CTL_INT, KERN_PRINTK_RATELIMIT_BURST, "printk_ratelimit_burst" },
......
......@@ -434,7 +434,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
rcu_read_unlock();
}
static void dump_header(struct oom_control *oc, struct task_struct *p)
static void dump_header(struct oom_control *oc)
{
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd\n",
current->comm, oc->gfp_mask, &oc->gfp_mask,
......@@ -445,23 +445,32 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
cpuset_print_current_mems_allowed();
dump_stack();
if (is_memcg_oom(oc))
mem_cgroup_print_oom_info(oc->memcg, p);
else {
}
static void dump_global_header(struct oom_control *oc, struct task_struct *p)
{
dump_header(oc);
#ifdef CONFIG_MEMCG
if (!p)
p = current;
if (!p)
p = current;
pr_info("Task in ");
rcu_read_lock();
pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id));
rcu_read_unlock();
pr_cont(" killed as a result of limit of host\n");
pr_info("Task in ");
rcu_read_lock();
pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id));
rcu_read_unlock();
pr_cont(" killed as a result of limit of host\n");
#endif
show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
if (is_dump_unreclaim_slabs())
dump_unreclaimable_slab();
}
show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
if (is_dump_unreclaim_slabs())
dump_unreclaimable_slab();
if (sysctl_oom_dump_tasks)
dump_tasks(oc->memcg, oc->nodemask);
}
static void dump_memcg_header(struct oom_control *oc, struct task_struct *p)
{
dump_header(oc);
mem_cgroup_print_oom_info(oc->memcg, p);
if (sysctl_oom_dump_tasks)
dump_tasks(oc->memcg, oc->nodemask);
}
......@@ -939,6 +948,8 @@ static int oom_kill_memcg_member(struct task_struct *task, void *unused)
return 0;
}
DEFINE_RATELIMIT_STATE(oom_memcg_rs, 10 * HZ, 5);
static void oom_kill_process(struct oom_control *oc, const char *message)
{
struct task_struct *p = oc->chosen;
......@@ -948,7 +959,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
struct task_struct *t;
struct mem_cgroup *oom_group;
unsigned int victim_points = 0;
static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
static DEFINE_RATELIMIT_STATE(oom_global_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
/*
......@@ -966,8 +977,10 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
}
task_unlock(p);
if (__ratelimit(&oom_rs))
dump_header(oc, p);
if (is_memcg_oom(oc) && __ratelimit(&oom_memcg_rs))
dump_memcg_header(oc, p);
else if (!is_memcg_oom(oc) && __ratelimit(&oom_global_rs))
dump_global_header(oc, p);
pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
message, task_pid_nr(p), p->comm, points);
......@@ -1047,7 +1060,7 @@ static void check_panic_on_oom(struct oom_control *oc,
/* Do not panic for oom kills triggered by sysrq */
if (is_sysrq_oom(oc))
return;
dump_header(oc, NULL);
dump_global_header(oc, NULL);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
}
......@@ -1132,7 +1145,7 @@ bool out_of_memory(struct oom_control *oc)
select_bad_process(oc);
/* Found nothing?!?! */
if (!oc->chosen) {
dump_header(oc, NULL);
dump_global_header(oc, NULL);
pr_warn("Out of memory and no killable processes...\n");
/*
* If we got here due to an actual allocation at the
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册