未验证 提交 6b33e607 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!1441 workqueue: fix sanity check warning when invoke destroy_workqueue()

Merge Pull Request from: @henryze 
 
https://gitee.com/openeuler/kernel/issues/I7LRJF?from=project-issue

The warning logs are listed below:

WARNING: CPU: 0 PID: 19336 at kernel/workqueue.c:4430 destroy_workqueue+0x11a/0x2f0
*****
destroy_workqueue: test_workqueue9 has the following busy pwq
  pwq 4: cpus=2 node=0 flags=0x0 nice=0 active=0/1 refcnt=2
      in-flight: 5658:wq_barrier_func
Showing busy workqueues and worker pools:
*****

It shows that even after drain_workqueue() returns, the barrier work item
is still in flight and the pwq (and a worker) is still busy on it.

The problem is caused by drain_workqueue() not watching flush_work():
~~~
Thread A				Worker
					/* normal work item with linked */
					process_scheduled_works()
destroy_workqueue()			  process_one_work()
  drain_workqueue()			    /* run normal work item */
				 /--	    pwq_dec_nr_in_flight()
    flush_workqueue()	    <---/
		/* the last normal work item is done */
  sanity_check				  process_one_work()
				       /--  raw_spin_unlock_irq(&pool->lock)
    raw_spin_lock_irq(&pool->lock)  <-/     /* maybe preempt */
    *WARNING*				    wq_barrier_func()
					    /* maybe preempt by cond_resched() */
~~~
So the solution is to make drain_workqueue() watch for flush_work() which
means making flush_workqueue() watch for flush_work().

Due to historical convenience, we used WORK_NO_COLOR for barrier work items
queued by flush_work().  The color has two purposes:
	Not participate in flushing
	Not participate in nr_active

Only the second purpose is obligatory.  So the plan is to mark barrier
work items inactive without using WORK_NO_COLOR in patch4 so that we can
assign a flushing color to them in patch5.

Patch1-3 are preparation, and patch6 is a cleanup.

Test steps:
insmod wq_issue.ko
rmmod wq_issue

~~~
# insmod wq_issue.ko
[   14.061088] wq_issue: loading out-of-tree module taints kernel.
[   14.070509] wq_test_init
[   14.072112] wq_test_init done
[   14.074035] insmod (92) used greatest stack depth: 13840 bytes left
/tmp # rmmod wq_issue.ko
[   24.489421] wq_test_exit done
/tmp # uname -a
Linux (none) 5.10.0+ #10 SMP Wed Jul 26 15:48:31 CST 2023 x86_64 GNU/Linux
~~~ 
 
Link:https://gitee.com/openeuler/kernel/pulls/1441 

Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> 
Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com> 
......@@ -30,7 +30,7 @@ void delayed_work_timer_fn(struct timer_list *t);
enum {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */
WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */
WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
......@@ -43,7 +43,7 @@ enum {
WORK_STRUCT_COLOR_BITS = 4,
WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT,
WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT,
WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK
......@@ -52,19 +52,14 @@ enum {
WORK_STRUCT_STATIC = 0,
#endif
/*
* The last color is no color used for works which don't
* participate in workqueue flushing.
*/
WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1,
WORK_NO_COLOR = WORK_NR_COLORS,
WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS),
/* not bound to any CPU, prefer the local CPU */
WORK_CPU_UNBOUND = NR_CPUS,
/*
* Reserve 8 bits off of pwq pointer w/ debugobjects turned off.
* This makes pwqs aligned to 256 bytes and allows 15 workqueue
* This makes pwqs aligned to 256 bytes and allows 16 workqueue
* flush colors.
*/
WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
......
......@@ -206,9 +206,26 @@ struct pool_workqueue {
int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
/*
* nr_active management and WORK_STRUCT_INACTIVE:
*
* When pwq->nr_active >= max_active, new work item is queued to
* pwq->inactive_works instead of pool->worklist and marked with
* WORK_STRUCT_INACTIVE.
*
* All work items marked with WORK_STRUCT_INACTIVE do not participate
* in pwq->nr_active and all work items in pwq->inactive_works are
* marked with WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE
* work items are in pwq->inactive_works. Some of them are ready to
* run in pool->worklist or worker->scheduled. Those work itmes are
* only struct wq_barrier which is used for flush_work() and should
* not participate in pwq->nr_active. For non-barrier work item, it
* is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
*/
int nr_active; /* L: nr of active works */
int max_active; /* L: max active works */
struct list_head delayed_works; /* L: delayed works */
struct list_head inactive_works; /* L: inactive works */
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
......@@ -580,9 +597,9 @@ static unsigned int work_color_to_flags(int color)
return color << WORK_STRUCT_COLOR_SHIFT;
}
static int get_work_color(struct work_struct *work)
static int get_work_color(unsigned long work_data)
{
return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
((1 << WORK_STRUCT_COLOR_BITS) - 1);
}
......@@ -1146,7 +1163,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
}
}
static void pwq_activate_delayed_work(struct work_struct *work)
static void pwq_activate_inactive_work(struct work_struct *work)
{
struct pool_workqueue *pwq = get_work_pwq(work);
......@@ -1154,22 +1171,22 @@ static void pwq_activate_delayed_work(struct work_struct *work)
if (list_empty(&pwq->pool->worklist))
pwq->pool->watchdog_ts = jiffies;
move_linked_works(work, &pwq->pool->worklist, NULL);
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
__clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
pwq->nr_active++;
}
static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
{
struct work_struct *work = list_first_entry(&pwq->delayed_works,
struct work_struct *work = list_first_entry(&pwq->inactive_works,
struct work_struct, entry);
pwq_activate_delayed_work(work);
pwq_activate_inactive_work(work);
}
/**
* pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
* @pwq: pwq of interest
* @color: color of work which left the queue
* @work_data: work_data of work which left the queue
*
* A work either has completed or is removed from pending queue,
* decrement nr_in_flight of its pwq and handle workqueue flushing.
......@@ -1177,21 +1194,21 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
* CONTEXT:
* raw_spin_lock_irq(pool->lock).
*/
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
{
/* uncolored work items don't participate in flushing or nr_active */
if (color == WORK_NO_COLOR)
goto out_put;
pwq->nr_in_flight[color]--;
int color = get_work_color(work_data);
pwq->nr_active--;
if (!list_empty(&pwq->delayed_works)) {
/* one down, submit a delayed one */
if (pwq->nr_active < pwq->max_active)
pwq_activate_first_delayed(pwq);
if (!(work_data & WORK_STRUCT_INACTIVE)) {
pwq->nr_active--;
if (!list_empty(&pwq->inactive_works)) {
/* one down, submit an inactive one */
if (pwq->nr_active < pwq->max_active)
pwq_activate_first_inactive(pwq);
}
}
pwq->nr_in_flight[color]--;
/* is flush in progress and are we at the flushing tip? */
if (likely(pwq->flush_color != color))
goto out_put;
......@@ -1291,17 +1308,21 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
debug_work_deactivate(work);
/*
* A delayed work item cannot be grabbed directly because
* it might have linked NO_COLOR work items which, if left
* on the delayed_list, will confuse pwq->nr_active
* A cancelable inactive work item must be in the
* pwq->inactive_works since a queued barrier can't be
* canceled (see the comments in insert_wq_barrier()).
*
* An inactive work item cannot be grabbed directly because
* it might have linked barrier work items which, if left
* on the inactive_works list, will confuse pwq->nr_active
* management later on and cause stall. Make sure the work
* item is activated before grabbing.
*/
if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
pwq_activate_delayed_work(work);
if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
pwq_activate_inactive_work(work);
list_del_init(&work->entry);
pwq_dec_nr_in_flight(pwq, get_work_color(work));
pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
/* work->data points to pwq iff queued, point to pool */
set_work_pool_and_keep_pending(work, pool->id);
......@@ -1497,8 +1518,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
if (list_empty(worklist))
pwq->pool->watchdog_ts = jiffies;
} else {
work_flags |= WORK_STRUCT_DELAYED;
worklist = &pwq->delayed_works;
work_flags |= WORK_STRUCT_INACTIVE;
worklist = &pwq->inactive_works;
}
debug_work_activate(work);
......@@ -2177,7 +2198,7 @@ __acquires(&pool->lock)
struct pool_workqueue *pwq = get_work_pwq(work);
struct worker_pool *pool = worker->pool;
bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
int work_color;
unsigned long work_data;
struct worker *collision;
#ifdef CONFIG_LOCKDEP
/*
......@@ -2213,7 +2234,8 @@ __acquires(&pool->lock)
worker->current_work = work;
worker->current_func = work->func;
worker->current_pwq = pwq;
work_color = get_work_color(work);
work_data = *work_data_bits(work);
worker->current_color = get_work_color(work_data);
/*
* Record wq name for cmdline and debug reporting, may get
......@@ -2319,7 +2341,8 @@ __acquires(&pool->lock)
worker->current_work = NULL;
worker->current_func = NULL;
worker->current_pwq = NULL;
pwq_dec_nr_in_flight(pwq, work_color);
worker->current_color = INT_MAX;
pwq_dec_nr_in_flight(pwq, work_data);
}
/**
......@@ -2535,7 +2558,7 @@ static int rescuer_thread(void *__rescuer)
/*
* The above execution of rescued work items could
* have created more to rescue through
* pwq_activate_first_delayed() or chained
* pwq_activate_first_inactive() or chained
* queueing. Let's put @pwq back on mayday list so
* that such back-to-back work items, which may be
* being used to relieve memory pressure, don't
......@@ -2662,8 +2685,9 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
struct wq_barrier *barr,
struct work_struct *target, struct worker *worker)
{
unsigned int work_flags = 0;
unsigned int work_color;
struct list_head *head;
unsigned int linked = 0;
/*
* debugobject calls are safe here even with pool->lock locked
......@@ -2678,24 +2702,31 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
barr->task = current;
/* The barrier work item does not participate in pwq->nr_active. */
work_flags |= WORK_STRUCT_INACTIVE;
/*
* If @target is currently being executed, schedule the
* barrier to the worker; otherwise, put it after @target.
*/
if (worker)
if (worker) {
head = worker->scheduled.next;
else {
work_color = worker->current_color;
} else {
unsigned long *bits = work_data_bits(target);
head = target->entry.next;
/* there can already be other linked works, inherit and set */
linked = *bits & WORK_STRUCT_LINKED;
work_flags |= *bits & WORK_STRUCT_LINKED;
work_color = get_work_color(*bits);
__set_bit(WORK_STRUCT_LINKED_BIT, bits);
}
pwq->nr_in_flight[work_color]++;
work_flags |= work_color_to_flags(work_color);
debug_work_activate(&barr->work);
insert_work(pwq, &barr->work, head,
work_color_to_flags(WORK_NO_COLOR) | linked);
insert_work(pwq, &barr->work, head, work_flags);
}
/**
......@@ -2961,7 +2992,7 @@ void drain_workqueue(struct workqueue_struct *wq)
bool drained;
raw_spin_lock_irq(&pwq->pool->lock);
drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
raw_spin_unlock_irq(&pwq->pool->lock);
if (drained)
......@@ -3717,7 +3748,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
* @pwq: target pool_workqueue
*
* If @pwq isn't freezing, set @pwq->max_active to the associated
* workqueue's saved_max_active and activate delayed work items
* workqueue's saved_max_active and activate inactive work items
* accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
*/
static void pwq_adjust_max_active(struct pool_workqueue *pwq)
......@@ -3746,9 +3777,9 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
pwq->max_active = wq->saved_max_active;
while (!list_empty(&pwq->delayed_works) &&
while (!list_empty(&pwq->inactive_works) &&
pwq->nr_active < pwq->max_active) {
pwq_activate_first_delayed(pwq);
pwq_activate_first_inactive(pwq);
kick = true;
}
......@@ -3779,7 +3810,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
pwq->wq = wq;
pwq->flush_color = -1;
pwq->refcnt = 1;
INIT_LIST_HEAD(&pwq->delayed_works);
INIT_LIST_HEAD(&pwq->inactive_works);
INIT_LIST_HEAD(&pwq->pwqs_node);
INIT_LIST_HEAD(&pwq->mayday_node);
INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
......@@ -4372,7 +4403,7 @@ static bool pwq_busy(struct pool_workqueue *pwq)
if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
return true;
if (pwq->nr_active || !list_empty(&pwq->delayed_works))
if (pwq->nr_active || !list_empty(&pwq->inactive_works))
return true;
return false;
......@@ -4568,7 +4599,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
else
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
ret = !list_empty(&pwq->delayed_works);
ret = !list_empty(&pwq->inactive_works);
preempt_enable();
rcu_read_unlock();
......@@ -4764,11 +4795,11 @@ static void show_pwq(struct pool_workqueue *pwq)
pr_cont("\n");
}
if (!list_empty(&pwq->delayed_works)) {
if (!list_empty(&pwq->inactive_works)) {
bool comma = false;
pr_info(" delayed:");
list_for_each_entry(work, &pwq->delayed_works, entry) {
pr_info(" inactive:");
list_for_each_entry(work, &pwq->inactive_works, entry) {
pr_cont_work(comma, work);
comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
}
......@@ -4798,7 +4829,7 @@ void show_workqueue_state(void)
bool idle = true;
for_each_pwq(pwq, wq) {
if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
idle = false;
break;
}
......@@ -4810,7 +4841,7 @@ void show_workqueue_state(void)
for_each_pwq(pwq, wq) {
raw_spin_lock_irqsave(&pwq->pool->lock, flags);
if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
/*
* Defer printing to avoid deadlocks in console
* drivers that queue work while holding locks
......@@ -5206,7 +5237,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe);
* freeze_workqueues_begin - begin freezing workqueues
*
* Start freezing workqueues. After this function returns, all freezable
* workqueues will queue new works to their delayed_works list instead of
* workqueues will queue new works to their inactive_works list instead of
* pool->worklist.
*
* CONTEXT:
......
......@@ -31,7 +31,8 @@ struct worker {
struct work_struct *current_work; /* L: work being processed */
work_func_t current_func; /* L: current_work's fn */
struct pool_workqueue *current_pwq; /* L: current_work's pwq */
struct pool_workqueue *current_pwq; /* L: current_work's pwq */
unsigned int current_color; /* L: current_work's color */
struct list_head scheduled; /* L: scheduled works */
/* 64 bytes boundary on 64bit, 32 on 32bit */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册