提交 989a1780 编写于 作者: S Steve Wise 提交者: Roland Dreier

RDMA/cxgb3: Correctly serialize peer abort path

Open MPI and other stress testing exposed a few bad bugs in handling
aborts in the middle of a normal close.  Fix these by:

 - serializing abort reply and peer abort processing with disconnect
   processing

 - warning (and ignoring) if ep timer is stopped when it wasn't running

 - cleaning up disconnect path to correctly deal with aborting and
   dead endpoints

 - in iwch_modify_qp(), taking a ref on the ep before releasing the qp
   lock if iwch_ep_disconnect() will be called.  The ref is dropped
   after calling disconnect.
Signed-off-by: NSteve Wise <swise@opengridcomputing.com>
Signed-off-by: NRoland Dreier <rolandd@cisco.com>
上级 e463c7b1
...@@ -125,6 +125,12 @@ static void start_ep_timer(struct iwch_ep *ep) ...@@ -125,6 +125,12 @@ static void start_ep_timer(struct iwch_ep *ep)
static void stop_ep_timer(struct iwch_ep *ep) static void stop_ep_timer(struct iwch_ep *ep)
{ {
PDBG("%s ep %p\n", __func__, ep); PDBG("%s ep %p\n", __func__, ep);
if (!timer_pending(&ep->timer)) {
printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n",
__func__, ep, ep->com.state);
WARN_ON(1);
return;
}
del_timer_sync(&ep->timer); del_timer_sync(&ep->timer);
put_ep(&ep->com); put_ep(&ep->com);
} }
...@@ -1083,8 +1089,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) ...@@ -1083,8 +1089,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
{ {
struct iwch_ep *ep = ctx; struct iwch_ep *ep = ctx;
unsigned long flags;
int release = 0;
PDBG("%s ep %p\n", __func__, ep); PDBG("%s ep %p\n", __func__, ep);
BUG_ON(!ep);
/* /*
* We get 2 abort replies from the HW. The first one must * We get 2 abort replies from the HW. The first one must
...@@ -1095,9 +1104,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) ...@@ -1095,9 +1104,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE; return CPL_RET_BUF_DONE;
} }
close_complete_upcall(ep); spin_lock_irqsave(&ep->com.lock, flags);
state_set(&ep->com, DEAD); switch (ep->com.state) {
release_ep_resources(ep); case ABORTING:
close_complete_upcall(ep);
__state_set(&ep->com, DEAD);
release = 1;
break;
default:
printk(KERN_ERR "%s ep %p state %d\n",
__func__, ep, ep->com.state);
break;
}
spin_unlock_irqrestore(&ep->com.lock, flags);
if (release)
release_ep_resources(ep);
return CPL_RET_BUF_DONE; return CPL_RET_BUF_DONE;
} }
...@@ -1470,7 +1492,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) ...@@ -1470,7 +1492,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct sk_buff *rpl_skb; struct sk_buff *rpl_skb;
struct iwch_qp_attributes attrs; struct iwch_qp_attributes attrs;
int ret; int ret;
int state; int release = 0;
unsigned long flags;
if (is_neg_adv_abort(req->status)) { if (is_neg_adv_abort(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
...@@ -1488,9 +1511,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) ...@@ -1488,9 +1511,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE; return CPL_RET_BUF_DONE;
} }
state = state_read(&ep->com); spin_lock_irqsave(&ep->com.lock, flags);
PDBG("%s ep %p state %u\n", __func__, ep, state); PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
switch (state) { switch (ep->com.state) {
case CONNECTING: case CONNECTING:
break; break;
case MPA_REQ_WAIT: case MPA_REQ_WAIT:
...@@ -1536,21 +1559,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) ...@@ -1536,21 +1559,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
break; break;
case DEAD: case DEAD:
PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
spin_unlock_irqrestore(&ep->com.lock, flags);
return CPL_RET_BUF_DONE; return CPL_RET_BUF_DONE;
default: default:
BUG_ON(1); BUG_ON(1);
break; break;
} }
dst_confirm(ep->dst); dst_confirm(ep->dst);
if (ep->com.state != ABORTING) {
__state_set(&ep->com, DEAD);
release = 1;
}
spin_unlock_irqrestore(&ep->com.lock, flags);
rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
if (!rpl_skb) { if (!rpl_skb) {
printk(KERN_ERR MOD "%s - cannot allocate skb!\n", printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
__func__); __func__);
dst_release(ep->dst); release = 1;
l2t_release(L2DATA(ep->com.tdev), ep->l2t); goto out;
put_ep(&ep->com);
return CPL_RET_BUF_DONE;
} }
rpl_skb->priority = CPL_PRIORITY_DATA; rpl_skb->priority = CPL_PRIORITY_DATA;
rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
...@@ -1559,10 +1586,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) ...@@ -1559,10 +1586,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
rpl->cmd = CPL_ABORT_NO_RST; rpl->cmd = CPL_ABORT_NO_RST;
cxgb3_ofld_send(ep->com.tdev, rpl_skb); cxgb3_ofld_send(ep->com.tdev, rpl_skb);
if (state != ABORTING) { out:
state_set(&ep->com, DEAD); if (release)
release_ep_resources(ep); release_ep_resources(ep);
}
return CPL_RET_BUF_DONE; return CPL_RET_BUF_DONE;
} }
...@@ -1661,15 +1687,18 @@ static void ep_timeout(unsigned long arg) ...@@ -1661,15 +1687,18 @@ static void ep_timeout(unsigned long arg)
struct iwch_ep *ep = (struct iwch_ep *)arg; struct iwch_ep *ep = (struct iwch_ep *)arg;
struct iwch_qp_attributes attrs; struct iwch_qp_attributes attrs;
unsigned long flags; unsigned long flags;
int abort = 1;
spin_lock_irqsave(&ep->com.lock, flags); spin_lock_irqsave(&ep->com.lock, flags);
PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
ep->com.state); ep->com.state);
switch (ep->com.state) { switch (ep->com.state) {
case MPA_REQ_SENT: case MPA_REQ_SENT:
__state_set(&ep->com, ABORTING);
connect_reply_upcall(ep, -ETIMEDOUT); connect_reply_upcall(ep, -ETIMEDOUT);
break; break;
case MPA_REQ_WAIT: case MPA_REQ_WAIT:
__state_set(&ep->com, ABORTING);
break; break;
case CLOSING: case CLOSING:
case MORIBUND: case MORIBUND:
...@@ -1679,13 +1708,17 @@ static void ep_timeout(unsigned long arg) ...@@ -1679,13 +1708,17 @@ static void ep_timeout(unsigned long arg)
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1); &attrs, 1);
} }
__state_set(&ep->com, ABORTING);
break; break;
default: default:
BUG(); printk(KERN_ERR "%s unexpected state ep %p state %u\n",
__func__, ep, ep->com.state);
WARN_ON(1);
abort = 0;
} }
__state_set(&ep->com, CLOSING);
spin_unlock_irqrestore(&ep->com.lock, flags); spin_unlock_irqrestore(&ep->com.lock, flags);
abort_connection(ep, NULL, GFP_ATOMIC); if (abort)
abort_connection(ep, NULL, GFP_ATOMIC);
put_ep(&ep->com); put_ep(&ep->com);
} }
...@@ -1968,40 +2001,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) ...@@ -1968,40 +2001,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
states[ep->com.state], abrupt); states[ep->com.state], abrupt);
if (ep->com.state == DEAD) {
PDBG("%s already dead ep %p\n", __func__, ep);
goto out;
}
if (abrupt) {
if (ep->com.state != ABORTING) {
ep->com.state = ABORTING;
close = 1;
}
goto out;
}
switch (ep->com.state) { switch (ep->com.state) {
case MPA_REQ_WAIT: case MPA_REQ_WAIT:
case MPA_REQ_SENT: case MPA_REQ_SENT:
case MPA_REQ_RCVD: case MPA_REQ_RCVD:
case MPA_REP_SENT: case MPA_REP_SENT:
case FPDU_MODE: case FPDU_MODE:
start_ep_timer(ep);
ep->com.state = CLOSING;
close = 1; close = 1;
if (abrupt)
ep->com.state = ABORTING;
else {
ep->com.state = CLOSING;
start_ep_timer(ep);
}
break; break;
case CLOSING: case CLOSING:
ep->com.state = MORIBUND;
close = 1; close = 1;
if (abrupt) {
stop_ep_timer(ep);
ep->com.state = ABORTING;
} else
ep->com.state = MORIBUND;
break; break;
case MORIBUND: case MORIBUND:
case ABORTING:
case DEAD:
PDBG("%s ignoring disconnect ep %p state %u\n",
__func__, ep, ep->com.state);
break; break;
default: default:
BUG(); BUG();
break; break;
} }
out:
spin_unlock_irqrestore(&ep->com.lock, flags); spin_unlock_irqrestore(&ep->com.lock, flags);
if (close) { if (close) {
if (abrupt) if (abrupt)
......
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#define put_ep(ep) { \ #define put_ep(ep) { \
PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
ep, atomic_read(&((ep)->kref.refcount))); \ ep, atomic_read(&((ep)->kref.refcount))); \
WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
kref_put(&((ep)->kref), __free_ep); \ kref_put(&((ep)->kref), __free_ep); \
} }
......
...@@ -832,6 +832,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, ...@@ -832,6 +832,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=0; abort=0;
disconnect = 1; disconnect = 1;
ep = qhp->ep; ep = qhp->ep;
get_ep(&ep->com);
} }
flush_qp(qhp, &flag); flush_qp(qhp, &flag);
break; break;
...@@ -848,6 +849,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, ...@@ -848,6 +849,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=1; abort=1;
disconnect = 1; disconnect = 1;
ep = qhp->ep; ep = qhp->ep;
get_ep(&ep->com);
} }
goto err; goto err;
break; break;
...@@ -929,8 +931,10 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, ...@@ -929,8 +931,10 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
* on the EP. This can be a normal close (RTS->CLOSING) or * on the EP. This can be a normal close (RTS->CLOSING) or
* an abnormal close (RTS/CLOSING->ERROR). * an abnormal close (RTS/CLOSING->ERROR).
*/ */
if (disconnect) if (disconnect) {
iwch_ep_disconnect(ep, abort, GFP_KERNEL); iwch_ep_disconnect(ep, abort, GFP_KERNEL);
put_ep(&ep->com);
}
/* /*
* If free is 1, then we've disassociated the EP from the QP * If free is 1, then we've disassociated the EP from the QP
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册