提交 1c111b6c 编写于 作者: O OGAWA Hirofumi 提交者: Greg Kroah-Hartman

xhci: Fix race related to abort operation

Current abort operation has race.

    xhci_handle_command_timeout()
      xhci_abort_cmd_ring()
        xhci_write_64(CMD_RING_ABORT)
        xhci_handshake(5s)
	  do {
	    check CMD_RING_RUNNING
            udelay(1)
					 ...
					 COMP_CMD_ABORT event
					 COMP_CMD_STOP event
					 xhci_handle_stopped_cmd_ring()
					   restart cmd_ring
                                           CMD_RING_RUNNING become 1 again
	  } while ()
          return -ETIMEDOUT
        xhci_write_64(CMD_RING_ABORT)
        /* can abort random command */

To do abort operation correctly, we have to wait both of COMP_CMD_STOP
event and negation of CMD_RING_RUNNING.

But like above, while timeout handler is waiting negation of
CMD_RING_RUNNING, event handler can restart cmd_ring. So timeout
handler never be notice negation of CMD_RING_RUNNING, and retry of
CMD_RING_ABORT can abort random command (BTW, I guess retry of
CMD_RING_ABORT was workaround of this race).

To fix this race, this moves xhci_handle_stopped_cmd_ring() to
xhci_abort_cmd_ring().  And timeout handler waits COMP_CMD_STOP event.

At this point, timeout handler is owner of cmd_ring, and safely
restart cmd_ring by using xhci_handle_stopped_cmd_ring().

[FWIW, as bonus, this way would be easily extend to add CMD_RING_PAUSE
operation]

[locks edited as patch is rebased on other locking fixes -Mathias]
Signed-off-by: NOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: NMathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: NGreg Kroah-Hartman <gregkh@linuxfoundation.org>
上级 cb4d5ce5
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
......@@ -2378,6 +2378,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
/* init command timeout work */
INIT_DELAYED_WORK(&xhci->cmd_timer, xhci_handle_command_timeout);
init_completion(&xhci->cmd_ring_stop_completion);
page_size = readl(&xhci->op_regs->page_size);
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
......
......@@ -284,23 +284,71 @@ static bool xhci_mod_cmd_timer(struct xhci_hcd *xhci, unsigned long delay)
return mod_delayed_work(system_wq, &xhci->cmd_timer, delay);
}
static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
static struct xhci_command *xhci_next_queued_cmd(struct xhci_hcd *xhci)
{
return list_first_entry_or_null(&xhci->cmd_list, struct xhci_command,
cmd_list);
}
/*
* Turn all commands on command ring with status set to "aborted" to no-op trbs.
* If there are other commands waiting then restart the ring and kick the timer.
* This must be called with command ring stopped and xhci->lock held.
*/
static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
struct xhci_command *cur_cmd)
{
struct xhci_command *i_cmd;
u32 cycle_state;
/* Turn all aborted commands in list to no-ops, then restart */
list_for_each_entry(i_cmd, &xhci->cmd_list, cmd_list) {
if (i_cmd->status != COMP_CMD_ABORT)
continue;
i_cmd->status = COMP_CMD_STOP;
xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
i_cmd->command_trb);
/* get cycle state from the original cmd trb */
cycle_state = le32_to_cpu(
i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
/* modify the command trb to no-op command */
i_cmd->command_trb->generic.field[0] = 0;
i_cmd->command_trb->generic.field[1] = 0;
i_cmd->command_trb->generic.field[2] = 0;
i_cmd->command_trb->generic.field[3] = cpu_to_le32(
TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
/*
* caller waiting for completion is called when command
* completion event is received for these no-op commands
*/
}
xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
/* ring command ring doorbell to restart the command ring */
if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
!(xhci->xhc_state & XHCI_STATE_DYING)) {
xhci->current_cmd = cur_cmd;
xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
xhci_ring_cmd_db(xhci);
}
}
/* Must be called with xhci->lock held, releases and aquires lock back */
static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
{
u64 temp_64;
int ret;
xhci_dbg(xhci, "Abort command ring\n");
temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
reinit_completion(&xhci->cmd_ring_stop_completion);
/*
* Writing the CMD_RING_ABORT bit should cause a cmd completion event,
* however on some host hw the CMD_RING_RUNNING bit is correctly cleared
* but the completion event in never sent. Use the cmd timeout timer to
* handle those cases. Use twice the time to cover the bit polling retry
*/
xhci_mod_cmd_timer(xhci, 2 * XHCI_CMD_DEFAULT_TIMEOUT);
temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
&xhci->op_regs->cmd_ring);
......@@ -320,17 +368,30 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
udelay(1000);
ret = xhci_handshake(&xhci->op_regs->cmd_ring,
CMD_RING_RUNNING, 0, 3 * 1000 * 1000);
if (ret == 0)
return 0;
xhci_err(xhci, "Stopped the command ring failed, "
"maybe the host is dead\n");
cancel_delayed_work(&xhci->cmd_timer);
xhci->xhc_state |= XHCI_STATE_DYING;
xhci_halt(xhci);
return -ESHUTDOWN;
if (ret < 0) {
xhci_err(xhci, "Stopped the command ring failed, "
"maybe the host is dead\n");
xhci->xhc_state |= XHCI_STATE_DYING;
xhci_halt(xhci);
return -ESHUTDOWN;
}
}
/*
* Writing the CMD_RING_ABORT bit should cause a cmd completion event,
* however on some host hw the CMD_RING_RUNNING bit is correctly cleared
* but the completion event in never sent. Wait 2 secs (arbitrary
* number) to handle those cases after negation of CMD_RING_RUNNING.
*/
spin_unlock_irqrestore(&xhci->lock, flags);
ret = wait_for_completion_timeout(&xhci->cmd_ring_stop_completion,
msecs_to_jiffies(2000));
spin_lock_irqsave(&xhci->lock, flags);
if (!ret) {
xhci_dbg(xhci, "No stop event for abort, ring start fail?\n");
xhci_cleanup_command_queue(xhci);
} else {
xhci_handle_stopped_cmd_ring(xhci, xhci_next_queued_cmd(xhci));
}
return 0;
}
......@@ -1212,64 +1273,12 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci)
xhci_complete_del_and_free_cmd(cur_cmd, COMP_CMD_ABORT);
}
/*
* Turn all commands on command ring with status set to "aborted" to no-op trbs.
* If there are other commands waiting then restart the ring and kick the timer.
* This must be called with command ring stopped and xhci->lock held.
*/
static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
struct xhci_command *cur_cmd)
{
struct xhci_command *i_cmd, *tmp_cmd;
u32 cycle_state;
/* Turn all aborted commands in list to no-ops, then restart */
list_for_each_entry_safe(i_cmd, tmp_cmd, &xhci->cmd_list,
cmd_list) {
if (i_cmd->status != COMP_CMD_ABORT)
continue;
i_cmd->status = COMP_CMD_STOP;
xhci_dbg(xhci, "Turn aborted command %p to no-op\n",
i_cmd->command_trb);
/* get cycle state from the original cmd trb */
cycle_state = le32_to_cpu(
i_cmd->command_trb->generic.field[3]) & TRB_CYCLE;
/* modify the command trb to no-op command */
i_cmd->command_trb->generic.field[0] = 0;
i_cmd->command_trb->generic.field[1] = 0;
i_cmd->command_trb->generic.field[2] = 0;
i_cmd->command_trb->generic.field[3] = cpu_to_le32(
TRB_TYPE(TRB_CMD_NOOP) | cycle_state);
/*
* caller waiting for completion is called when command
* completion event is received for these no-op commands
*/
}
xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
/* ring command ring doorbell to restart the command ring */
if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
!(xhci->xhc_state & XHCI_STATE_DYING)) {
xhci->current_cmd = cur_cmd;
xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT);
xhci_ring_cmd_db(xhci);
}
return;
}
void xhci_handle_command_timeout(struct work_struct *work)
{
struct xhci_hcd *xhci;
int ret;
unsigned long flags;
u64 hw_ring_state;
bool second_timeout = false;
xhci = container_of(to_delayed_work(work), struct xhci_hcd, cmd_timer);
......@@ -1283,18 +1292,17 @@ void xhci_handle_command_timeout(struct work_struct *work)
spin_unlock_irqrestore(&xhci->lock, flags);
return;
}
/* mark this command to be cancelled */
if (xhci->current_cmd->status == COMP_CMD_ABORT)
second_timeout = true;
xhci->current_cmd->status = COMP_CMD_ABORT;
/* Make sure command ring is running before aborting it */
hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
(hw_ring_state & CMD_RING_RUNNING)) {
/* Prevent new doorbell, and start command abort */
xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
xhci_dbg(xhci, "Command timeout\n");
ret = xhci_abort_cmd_ring(xhci);
ret = xhci_abort_cmd_ring(xhci, flags);
if (unlikely(ret == -ESHUTDOWN)) {
xhci_err(xhci, "Abort command ring failed\n");
xhci_cleanup_command_queue(xhci);
......@@ -1308,9 +1316,9 @@ void xhci_handle_command_timeout(struct work_struct *work)
goto time_out_completed;
}
/* command ring failed to restart, or host removed. Bail out */
if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
/* host removed. Bail out */
if (xhci->xhc_state & XHCI_STATE_REMOVING) {
xhci_dbg(xhci, "host removed, ring start fail?\n");
xhci_cleanup_command_queue(xhci);
goto time_out_completed;
......@@ -1360,7 +1368,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
/* If CMD ring stopped we own the trbs between enqueue and dequeue */
if (cmd_comp_code == COMP_CMD_STOP) {
xhci_handle_stopped_cmd_ring(xhci, cmd);
complete_all(&xhci->cmd_ring_stop_completion);
return;
}
......
......@@ -1569,6 +1569,7 @@ struct xhci_hcd {
struct list_head cmd_list;
unsigned int cmd_ring_reserved_trbs;
struct delayed_work cmd_timer;
struct completion cmd_ring_stop_completion;
struct xhci_command *current_cmd;
struct xhci_ring *event_ring;
struct xhci_erst erst;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部