提交 692b4825 编写于 作者: T Tejun Heo

workqueue: replace pool->manager_arb mutex with a flag

Josef reported a HARDIRQ-safe -> HARDIRQ-unsafe lock order detected by
lockdep:

 [ 1270.472259] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
 [ 1270.472783] 4.14.0-rc1-xfstests-12888-g76833e8 #110 Not tainted
 [ 1270.473240] -----------------------------------------------------
 [ 1270.473710] kworker/u5:2/5157 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
 [ 1270.474239]  (&(&lock->wait_lock)->rlock){+.+.}, at: [<ffffffff8da253d2>] __mutex_unlock_slowpath+0xa2/0x280
 [ 1270.474994]
 [ 1270.474994] and this task is already holding:
 [ 1270.475440]  (&pool->lock/1){-.-.}, at: [<ffffffff8d2992f6>] worker_thread+0x366/0x3c0
 [ 1270.476046] which would create a new lock dependency:
 [ 1270.476436]  (&pool->lock/1){-.-.} -> (&(&lock->wait_lock)->rlock){+.+.}
 [ 1270.476949]
 [ 1270.476949] but this new dependency connects a HARDIRQ-irq-safe lock:
 [ 1270.477553]  (&pool->lock/1){-.-.}
 ...
 [ 1270.488900] to a HARDIRQ-irq-unsafe lock:
 [ 1270.489327]  (&(&lock->wait_lock)->rlock){+.+.}
 ...
 [ 1270.494735]  Possible interrupt unsafe locking scenario:
 [ 1270.494735]
 [ 1270.495250]        CPU0                    CPU1
 [ 1270.495600]        ----                    ----
 [ 1270.495947]   lock(&(&lock->wait_lock)->rlock);
 [ 1270.496295]                                local_irq_disable();
 [ 1270.496753]                                lock(&pool->lock/1);
 [ 1270.497205]                                lock(&(&lock->wait_lock)->rlock);
 [ 1270.497744]   <Interrupt>
 [ 1270.497948]     lock(&pool->lock/1);

, which will cause a irq inversion deadlock if the above lock scenario
happens.

The root cause of this safe -> unsafe lock order is the
mutex_unlock(pool->manager_arb) in manage_workers() with pool->lock
held.

Unlocking mutex while holding an irq spinlock was never safe and this
problem has been around forever but it never got noticed because the
only time the mutex is usually trylocked while holding irqlock making
actual failures very unlikely and lockdep annotation missed the
condition until the recent b9c16a0e ("locking/mutex: Fix
lockdep_assert_held() fail").

Using mutex for pool->manager_arb has always been a bit of stretch.
It primarily is an mechanism to arbitrate managership between workers
which can easily be done with a pool flag.  The only reason it became
a mutex is that pool destruction path wants to exclude parallel
managing operations.

This patch replaces the mutex with a new pool flag POOL_MANAGER_ACTIVE
and make the destruction path wait for the current manager on a wait
queue.

v2: Drop unnecessary flag clearing before pool destruction as
    suggested by Boqun.
Signed-off-by: NTejun Heo <tj@kernel.org>
Reported-by: NJosef Bacik <josef@toxicpanda.com>
Reviewed-by: NLai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: stable@vger.kernel.org
上级 47684e11
...@@ -68,6 +68,7 @@ enum { ...@@ -68,6 +68,7 @@ enum {
* attach_mutex to avoid changing binding state while * attach_mutex to avoid changing binding state while
* worker_attach_to_pool() is in progress. * worker_attach_to_pool() is in progress.
*/ */
POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
/* worker flags */ /* worker flags */
...@@ -165,7 +166,6 @@ struct worker_pool { ...@@ -165,7 +166,6 @@ struct worker_pool {
/* L: hash of busy workers */ /* L: hash of busy workers */
/* see manage_workers() for details on the two manager mutexes */ /* see manage_workers() for details on the two manager mutexes */
struct mutex manager_arb; /* manager arbitration */
struct worker *manager; /* L: purely informational */ struct worker *manager; /* L: purely informational */
struct mutex attach_mutex; /* attach/detach exclusion */ struct mutex attach_mutex; /* attach/detach exclusion */
struct list_head workers; /* A: attached workers */ struct list_head workers; /* A: attached workers */
...@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf; ...@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
static LIST_HEAD(workqueues); /* PR: list of all workqueues */ static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing; /* PL: have wqs started freezing? */ static bool workqueue_freezing; /* PL: have wqs started freezing? */
...@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool) ...@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
/* Do we have too many workers and should some go away? */ /* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool) static bool too_many_workers(struct worker_pool *pool)
{ {
bool managing = mutex_is_locked(&pool->manager_arb); bool managing = pool->flags & POOL_MANAGER_ACTIVE;
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
int nr_busy = pool->nr_workers - nr_idle; int nr_busy = pool->nr_workers - nr_idle;
...@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker) ...@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
{ {
struct worker_pool *pool = worker->pool; struct worker_pool *pool = worker->pool;
/* if (pool->flags & POOL_MANAGER_ACTIVE)
* Anyone who successfully grabs manager_arb wins the arbitration
* and becomes the manager. mutex_trylock() on pool->manager_arb
* failure while holding pool->lock reliably indicates that someone
* else is managing the pool and the worker which failed trylock
* can proceed to executing work items. This means that anyone
* grabbing manager_arb is responsible for actually performing
* manager duties. If manager_arb is grabbed and released without
* actual management, the pool may stall indefinitely.
*/
if (!mutex_trylock(&pool->manager_arb))
return false; return false;
pool->flags |= POOL_MANAGER_ACTIVE;
pool->manager = worker; pool->manager = worker;
maybe_create_worker(pool); maybe_create_worker(pool);
pool->manager = NULL; pool->manager = NULL;
mutex_unlock(&pool->manager_arb); pool->flags &= ~POOL_MANAGER_ACTIVE;
wake_up(&wq_manager_wait);
return true; return true;
} }
...@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool) ...@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
setup_timer(&pool->mayday_timer, pool_mayday_timeout, setup_timer(&pool->mayday_timer, pool_mayday_timeout,
(unsigned long)pool); (unsigned long)pool);
mutex_init(&pool->manager_arb);
mutex_init(&pool->attach_mutex); mutex_init(&pool->attach_mutex);
INIT_LIST_HEAD(&pool->workers); INIT_LIST_HEAD(&pool->workers);
...@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool) ...@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
hash_del(&pool->hash_node); hash_del(&pool->hash_node);
/* /*
* Become the manager and destroy all workers. Grabbing * Become the manager and destroy all workers. This prevents
* manager_arb prevents @pool's workers from blocking on * @pool's workers from blocking on attach_mutex. We're the last
* attach_mutex. * manager and @pool gets freed with the flag set.
*/ */
mutex_lock(&pool->manager_arb);
spin_lock_irq(&pool->lock); spin_lock_irq(&pool->lock);
wait_event_lock_irq(wq_manager_wait,
!(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
pool->flags |= POOL_MANAGER_ACTIVE;
while ((worker = first_idle_worker(pool))) while ((worker = first_idle_worker(pool)))
destroy_worker(worker); destroy_worker(worker);
WARN_ON(pool->nr_workers || pool->nr_idle); WARN_ON(pool->nr_workers || pool->nr_idle);
...@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool) ...@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
if (pool->detach_completion) if (pool->detach_completion)
wait_for_completion(pool->detach_completion); wait_for_completion(pool->detach_completion);
mutex_unlock(&pool->manager_arb);
/* shut down the timers */ /* shut down the timers */
del_timer_sync(&pool->idle_timer); del_timer_sync(&pool->idle_timer);
del_timer_sync(&pool->mayday_timer); del_timer_sync(&pool->mayday_timer);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册