提交 85f1abe0 编写于 作者: P Peter Zijlstra 提交者: Ingo Molnar

kthread, sched/wait: Fix kthread_parkme() completion issue

Even with the wait-loop fixed, there is a further issue with
kthread_parkme(). Upon hotplug, when we do takedown_cpu(),
smpboot_park_threads() can return before all those threads are in fact
blocked, due to the placement of the complete() in __kthread_parkme().

When that happens, sched_cpu_dying() -> migrate_tasks() can end up
migrating such a still runnable task onto another CPU.

Normally the task will have hit schedule() and gone to sleep by the
time we do kthread_unpark(), which will then do __kthread_bind() to
re-bind the task to the correct CPU.

However, when we loose the initial TASK_PARKED store to the concurrent
wakeup issue described previously, do the complete(), get migrated, it
is possible to either:

 - observe kthread_unpark()'s clearing of SHOULD_PARK and terminate
   the park and set TASK_RUNNING, or

 - __kthread_bind()'s wait_task_inactive() to observe the competing
   TASK_RUNNING store.

Either way the WARN() in __kthread_bind() will trigger and fail to
correctly set the CPU affinity.

Fix this by only issuing the complete() when the kthread has scheduled
out. This does away with all the icky 'still running' nonsense.

The alternative is to promote TASK_PARKED to a special state, this
guarantees wait_task_inactive() cannot observe a 'stale' TASK_RUNNING
and we'll end up doing the right thing, but this preserves the whole
icky business of potentially migating the still runnable thing.
Reported-by: NGaurav Kohli <gkohli@codeaurora.org>
Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: NIngo Molnar <mingo@kernel.org>
上级 741a76b3
...@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k); ...@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k); int kthread_park(struct task_struct *k);
void kthread_unpark(struct task_struct *k); void kthread_unpark(struct task_struct *k);
void kthread_parkme(void); void kthread_parkme(void);
void kthread_park_complete(struct task_struct *k);
int kthreadd(void *unused); int kthreadd(void *unused);
extern struct task_struct *kthreadd_task; extern struct task_struct *kthreadd_task;
......
...@@ -55,7 +55,6 @@ enum KTHREAD_BITS { ...@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
KTHREAD_IS_PER_CPU = 0, KTHREAD_IS_PER_CPU = 0,
KTHREAD_SHOULD_STOP, KTHREAD_SHOULD_STOP,
KTHREAD_SHOULD_PARK, KTHREAD_SHOULD_PARK,
KTHREAD_IS_PARKED,
}; };
static inline void set_kthread_struct(void *kthread) static inline void set_kthread_struct(void *kthread)
...@@ -181,11 +180,8 @@ static void __kthread_parkme(struct kthread *self) ...@@ -181,11 +180,8 @@ static void __kthread_parkme(struct kthread *self)
set_current_state(TASK_PARKED); set_current_state(TASK_PARKED);
if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
break; break;
if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
complete(&self->parked);
schedule(); schedule();
} }
clear_bit(KTHREAD_IS_PARKED, &self->flags);
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
} }
...@@ -195,6 +191,11 @@ void kthread_parkme(void) ...@@ -195,6 +191,11 @@ void kthread_parkme(void)
} }
EXPORT_SYMBOL_GPL(kthread_parkme); EXPORT_SYMBOL_GPL(kthread_parkme);
void kthread_park_complete(struct task_struct *k)
{
complete(&to_kthread(k)->parked);
}
static int kthread(void *_create) static int kthread(void *_create)
{ {
/* Copy data: it's on kthread's stack */ /* Copy data: it's on kthread's stack */
...@@ -451,22 +452,15 @@ void kthread_unpark(struct task_struct *k) ...@@ -451,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
{ {
struct kthread *kthread = to_kthread(k); struct kthread *kthread = to_kthread(k);
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/*
* We clear the IS_PARKED bit here as we don't wait
* until the task has left the park code. So if we'd
* park before that happens we'd see the IS_PARKED bit
* which might be about to be cleared.
*/
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
/* /*
* Newly created kthread was parked when the CPU was offline. * Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again. * The binding was lost and we need to set it again.
*/ */
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu, TASK_PARKED); __kthread_bind(k, kthread->cpu, TASK_PARKED);
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
wake_up_state(k, TASK_PARKED); wake_up_state(k, TASK_PARKED);
}
} }
EXPORT_SYMBOL_GPL(kthread_unpark); EXPORT_SYMBOL_GPL(kthread_unpark);
...@@ -489,13 +483,14 @@ int kthread_park(struct task_struct *k) ...@@ -489,13 +483,14 @@ int kthread_park(struct task_struct *k)
if (WARN_ON(k->flags & PF_EXITING)) if (WARN_ON(k->flags & PF_EXITING))
return -ENOSYS; return -ENOSYS;
if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
return -EBUSY;
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) { if (k != current) {
wake_up_process(k); wake_up_process(k);
wait_for_completion(&kthread->parked); wait_for_completion(&kthread->parked);
} }
}
return 0; return 0;
} }
......
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
*/ */
#include "sched.h" #include "sched.h"
#include <linux/kthread.h>
#include <asm/switch_to.h> #include <asm/switch_to.h>
#include <asm/tlb.h> #include <asm/tlb.h>
...@@ -2718,7 +2720,9 @@ static struct rq *finish_task_switch(struct task_struct *prev) ...@@ -2718,7 +2720,9 @@ static struct rq *finish_task_switch(struct task_struct *prev)
membarrier_mm_sync_core_before_usermode(mm); membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm); mmdrop(mm);
} }
if (unlikely(prev_state == TASK_DEAD)) { if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
switch (prev_state) {
case TASK_DEAD:
if (prev->sched_class->task_dead) if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev); prev->sched_class->task_dead(prev);
...@@ -2732,6 +2736,12 @@ static struct rq *finish_task_switch(struct task_struct *prev) ...@@ -2732,6 +2736,12 @@ static struct rq *finish_task_switch(struct task_struct *prev)
put_task_stack(prev); put_task_stack(prev);
put_task_struct(prev); put_task_struct(prev);
break;
case TASK_PARKED:
kthread_park_complete(prev);
break;
}
} }
tick_nohz_task_switch(); tick_nohz_task_switch();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册