From f14304cdc95560fcc670bbf404c7555d3aaa0d73 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Fri, 16 Nov 2018 11:57:17 +0800 Subject: [PATCH] alinux: kernel: Reduce tasklist_lock contention at fork and exit to #16966300 to #16966377 We observed heavy tasklist_lock contention when offline tasks start and end, which caused long scheduling latency and long period with interrupt off since write_lock_irq() is called. In extreme cases with tons of concurrent fork and exit events, it can cause the system hung. This patch changed them to use trylock, then operations within lock consume little time, so it naturally addresses the issue. After this patch, when I launched and killed thousands of tasks, the latency can reduce from tens of milliscends to around 2ms on my box. The patch can pass Unixbench tests, no regression introduced. Reviewed-by: Shanpei Chen Signed-off-by: Xunlei Pang Acked-by: Shile Zhang --- kernel/exit.c | 9 ++++++++- kernel/fork.c | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index c536da853873..1ddcc422bccd 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -710,10 +710,17 @@ static void forget_original_parent(struct task_struct *father, static void exit_notify(struct task_struct *tsk, int group_dead) { bool autoreap; + unsigned long flags; struct task_struct *p, *n; LIST_HEAD(dead); - write_lock_irq(&tasklist_lock); +retry: + if (!write_trylock_irqsave(&tasklist_lock, flags)) { + if (!in_atomic() && !irqs_disabled()) + cond_resched(); + goto retry; + } + forget_original_parent(tsk, &dead); if (group_dead) diff --git a/kernel/fork.c b/kernel/fork.c index 23302e2bc7bf..4fd1f435ce86 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1658,6 +1658,7 @@ static __latent_entropy struct task_struct *copy_process( int retval; struct task_struct *p; struct multiprocess_signals delayed; + unsigned long flags; /* * Don't allow sharing the root directory with processes in a different @@ -1986,7 +1987,11 @@ static __latent_entropy struct task_struct *copy_process( * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! */ - write_lock_irq(&tasklist_lock); +retry: + if (!write_trylock_irqsave(&tasklist_lock, flags)) { + cond_resched(); + goto retry; + } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { -- GitLab