ipoib_cm.c 43.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
/*
 * Copyright (c) 2006 Mellanox Technologies. All rights reserved
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <rdma/ib_cm.h>
#include <net/dst.h>
#include <net/icmp.h>
#include <linux/icmpv6.h>
M
Michael S. Tsirkin 已提交
37
#include <linux/delay.h>
38
#include <linux/slab.h>
39
#include <linux/vmalloc.h>
40
#include <linux/moduleparam.h>
41
#include <linux/sched/signal.h>
42
#include <linux/sched/mm.h>
43

44 45 46 47 48 49 50 51 52
#include "ipoib.h"

int ipoib_max_conn_qp = 128;

module_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444);
MODULE_PARM_DESC(max_nonsrq_conn_qp,
		 "Max number of connected-mode QPs per interface "
		 "(applied only if shared receive queue is not available)");

53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
static int data_debug_level;

module_param_named(cm_data_debug_level, data_debug_level, int, 0644);
MODULE_PARM_DESC(cm_data_debug_level,
		 "Enable data path debug tracing for connected mode if > 0");
#endif

#define IPOIB_CM_IETF_ID 0x1000000000000000ULL

#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
#define IPOIB_CM_RX_TIMEOUT     (2 * 256 * HZ)
#define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
#define IPOIB_CM_RX_UPDATE_MASK (0x3)

68 69
#define IPOIB_CM_RX_RESERVE     (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN)

M
Michael S. Tsirkin 已提交
70 71 72 73
static struct ib_qp_attr ipoib_cm_err_attr = {
	.qp_state = IB_QPS_ERR
};

R
Roland Dreier 已提交
74
#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
M
Michael S. Tsirkin 已提交
75

76 77
static struct ib_send_wr ipoib_cm_rx_drain_wr = {
	.opcode = IB_WR_SEND,
M
Michael S. Tsirkin 已提交
78 79
};

80 81 82
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
			       struct ib_cm_event *event);

83
static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
84 85 86 87 88 89
				  u64 mapping[IPOIB_CM_RX_SG])
{
	int i;

	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);

90
	for (i = 0; i < frags; ++i)
91
		ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
92 93
}

94
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
95
{
96
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
97 98 99
	struct ib_recv_wr *bad_wr;
	int i, ret;

100
	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
101

102
	for (i = 0; i < priv->cm.num_frags; ++i)
103 104 105 106 107
		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];

	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
	if (unlikely(ret)) {
		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
108
		ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
109
				      priv->cm.srq_ring[id].mapping);
110 111 112 113 114 115 116
		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
		priv->cm.srq_ring[id].skb = NULL;
	}

	return ret;
}

117
static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
118 119 120
					struct ipoib_cm_rx *rx,
					struct ib_recv_wr *wr,
					struct ib_sge *sge, int id)
121
{
122
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
123 124 125
	struct ib_recv_wr *bad_wr;
	int i, ret;

126
	wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
127 128

	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
129
		sge[i].addr = rx->rx_ring[id].mapping[i];
130

131
	ret = ib_post_recv(rx->qp, wr, &bad_wr);
132 133 134 135 136 137 138 139 140 141 142 143 144 145
	if (unlikely(ret)) {
		ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
				      rx->rx_ring[id].mapping);
		dev_kfree_skb_any(rx->rx_ring[id].skb);
		rx->rx_ring[id].skb = NULL;
	}

	return ret;
}

static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
					     struct ipoib_cm_rx_buf *rx_ring,
					     int id, int frags,
146 147
					     u64 mapping[IPOIB_CM_RX_SG],
					     gfp_t gfp)
148
{
149
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
150 151 152
	struct sk_buff *skb;
	int i;

153
	skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16));
154
	if (unlikely(!skb))
155
		return NULL;
156 157

	/*
158
	 * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the
159 160
	 * IP header to a multiple of 16.
	 */
161
	skb_reserve(skb, IPOIB_CM_RX_RESERVE);
162 163 164 165 166

	mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
				       DMA_FROM_DEVICE);
	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
		dev_kfree_skb_any(skb);
167
		return NULL;
168 169
	}

170
	for (i = 0; i < frags; i++) {
171
		struct page *page = alloc_page(gfp);
172 173 174 175 176

		if (!page)
			goto partial_error;
		skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);

177
		mapping[i + 1] = ib_dma_map_page(priv->ca, page,
178
						 0, PAGE_SIZE, DMA_FROM_DEVICE);
179 180 181 182
		if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
			goto partial_error;
	}

183
	rx_ring[id].skb = skb;
184
	return skb;
185 186 187 188 189

partial_error:

	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);

190
	for (; i > 0; --i)
191
		ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
192

193
	dev_kfree_skb_any(skb);
194
	return NULL;
195 196
}

197 198 199
static void ipoib_cm_free_rx_ring(struct net_device *dev,
				  struct ipoib_cm_rx_buf *rx_ring)
{
200
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
201 202 203 204 205 206 207 208 209
	int i;

	for (i = 0; i < ipoib_recvq_size; ++i)
		if (rx_ring[i].skb) {
			ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
					      rx_ring[i].mapping);
			dev_kfree_skb_any(rx_ring[i].skb);
		}

210
	vfree(rx_ring);
211 212
}

213
static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
M
Michael S. Tsirkin 已提交
214
{
215 216
	struct ib_send_wr *bad_wr;
	struct ipoib_cm_rx *p;
M
Michael S. Tsirkin 已提交
217

218
	/* We only reserved 1 extra slot in CQ for drain WRs, so
M
Michael S. Tsirkin 已提交
219 220 221 222 223
	 * make sure we have at most 1 outstanding WR. */
	if (list_empty(&priv->cm.rx_flush_list) ||
	    !list_empty(&priv->cm.rx_drain_list))
		return;

224 225 226 227 228
	/*
	 * QPs on flush list are error state.  This way, a "flush
	 * error" WC will be immediately generated for each WR we post.
	 */
	p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
229
	ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
230 231
	if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
		ipoib_warn(priv, "failed to post drain wr\n");
M
Michael S. Tsirkin 已提交
232 233 234 235 236 237 238

	list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
}

static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
{
	struct ipoib_cm_rx *p = ctx;
239
	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
M
Michael S. Tsirkin 已提交
240 241 242 243 244 245 246 247 248 249 250 251
	unsigned long flags;

	if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
		return;

	spin_lock_irqsave(&priv->lock, flags);
	list_move(&p->list, &priv->cm.rx_flush_list);
	p->state = IPOIB_CM_RX_FLUSH;
	ipoib_cm_start_rx_drain(priv);
	spin_unlock_irqrestore(&priv->lock, flags);
}

252 253 254
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
					   struct ipoib_cm_rx *p)
{
255
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
256
	struct ib_qp_init_attr attr = {
M
Michael S. Tsirkin 已提交
257
		.event_handler = ipoib_cm_rx_event_handler,
258 259
		.send_cq = priv->recv_cq, /* For drain WR */
		.recv_cq = priv->recv_cq,
260
		.srq = priv->cm.srq,
261
		.cap.max_send_wr = 1, /* For drain WR */
262 263 264 265 266
		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
		.sq_sig_type = IB_SIGNAL_ALL_WR,
		.qp_type = IB_QPT_RC,
		.qp_context = p,
	};
267 268 269 270 271 272

	if (!ipoib_cm_has_srq(dev)) {
		attr.cap.max_recv_wr  = ipoib_recvq_size;
		attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
	}

273 274 275 276
	return ib_create_qp(priv->pd, &attr);
}

static int ipoib_cm_modify_rx_qp(struct net_device *dev,
277 278
				 struct ib_cm_id *cm_id, struct ib_qp *qp,
				 unsigned psn)
279
{
280
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
	struct ib_qp_attr qp_attr;
	int qp_attr_mask, ret;

	qp_attr.qp_state = IB_QPS_INIT;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
		return ret;
	}
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
		return ret;
	}
	qp_attr.qp_state = IB_QPS_RTR;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
		return ret;
	}
	qp_attr.rq_psn = psn;
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
		return ret;
	}
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327

	/*
	 * Current Mellanox HCA firmware won't generate completions
	 * with error for drain WRs unless the QP has been moved to
	 * RTS first. This work-around leaves a window where a QP has
	 * moved to error asynchronously, but this will eventually get
	 * fixed in firmware, so let's not error out if modify QP
	 * fails.
	 */
	qp_attr.qp_state = IB_QPS_RTS;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
		return 0;
	}
	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
		return 0;
	}

328 329 330
	return 0;
}

331 332 333 334
static void ipoib_cm_init_rx_wr(struct net_device *dev,
				struct ib_recv_wr *wr,
				struct ib_sge *sge)
{
335
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
336 337 338
	int i;

	for (i = 0; i < priv->cm.num_frags; ++i)
339
		sge[i].lkey = priv->pd->local_dma_lkey;
340 341 342 343 344 345

	sge[0].length = IPOIB_CM_HEAD_SIZE;
	for (i = 1; i < priv->cm.num_frags; ++i)
		sge[i].length = PAGE_SIZE;

	wr->next    = NULL;
346
	wr->sg_list = sge;
347 348 349
	wr->num_sge = priv->cm.num_frags;
}

350 351 352
static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
				   struct ipoib_cm_rx *rx)
{
353
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
354 355 356 357
	struct {
		struct ib_recv_wr wr;
		struct ib_sge sge[IPOIB_CM_RX_SG];
	} *t;
358 359 360
	int ret;
	int i;

361 362
	rx->rx_ring = vzalloc(array_size(ipoib_recvq_size,
					 sizeof(*rx->rx_ring)));
363
	if (!rx->rx_ring)
364
		return -ENOMEM;
365

366 367 368
	t = kmalloc(sizeof *t, GFP_KERNEL);
	if (!t) {
		ret = -ENOMEM;
369
		goto err_free_1;
370 371 372 373
	}

	ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);

374 375 376 377 378 379 380 381 382 383 384 385 386 387
	spin_lock_irq(&priv->lock);

	if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
		spin_unlock_irq(&priv->lock);
		ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0);
		ret = -EINVAL;
		goto err_free;
	} else
		++priv->cm.nonsrq_conn_qp;

	spin_unlock_irq(&priv->lock);

	for (i = 0; i < ipoib_recvq_size; ++i) {
		if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
388 389
					   rx->rx_ring[i].mapping,
					   GFP_KERNEL)) {
390
			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
391 392
			ret = -ENOMEM;
			goto err_count;
393 394
		}
		ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
395 396 397 398 399 400 401 402 403 404
		if (ret) {
			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
				   "failed for buf %d\n", i);
			ret = -EIO;
			goto err_count;
		}
	}

	rx->recv_count = ipoib_recvq_size;

405 406
	kfree(t);

407 408 409 410 411 412 413 414
	return 0;

err_count:
	spin_lock_irq(&priv->lock);
	--priv->cm.nonsrq_conn_qp;
	spin_unlock_irq(&priv->lock);

err_free:
415
	kfree(t);
416 417

err_free_1:
418 419 420 421 422
	ipoib_cm_free_rx_ring(dev, rx->rx_ring);

	return ret;
}

423 424 425 426
static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
			     struct ib_qp *qp, struct ib_cm_req_event_param *req,
			     unsigned psn)
{
427
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
428 429 430 431 432 433 434 435 436 437
	struct ipoib_cm_data data = {};
	struct ib_cm_rep_param rep = {};

	data.qpn = cpu_to_be32(priv->qp->qp_num);
	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);

	rep.private_data = &data;
	rep.private_data_len = sizeof data;
	rep.flow_control = 0;
	rep.rnr_retry_count = req->rnr_retry_count;
438
	rep.srq = ipoib_cm_has_srq(dev);
439 440 441 442 443 444 445 446
	rep.qp_num = qp->qp_num;
	rep.starting_psn = psn;
	return ib_send_cm_rep(cm_id, &rep);
}

static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
	struct net_device *dev = cm_id->context;
447
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
448 449 450 451 452 453 454 455 456 457
	struct ipoib_cm_rx *p;
	unsigned psn;
	int ret;

	ipoib_dbg(priv, "REQ arrived\n");
	p = kzalloc(sizeof *p, GFP_KERNEL);
	if (!p)
		return -ENOMEM;
	p->dev = dev;
	p->id = cm_id;
458 459 460 461 462
	cm_id->context = p;
	p->state = IPOIB_CM_RX_LIVE;
	p->jiffies = jiffies;
	INIT_LIST_HEAD(&p->list);

463 464 465 466 467 468
	p->qp = ipoib_cm_create_rx_qp(dev, p);
	if (IS_ERR(p->qp)) {
		ret = PTR_ERR(p->qp);
		goto err_qp;
	}

469
	psn = prandom_u32() & 0xffffff;
470 471 472 473
	ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
	if (ret)
		goto err_modify;

474 475 476 477 478 479
	if (!ipoib_cm_has_srq(dev)) {
		ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p);
		if (ret)
			goto err_modify;
	}

480
	spin_lock_irq(&priv->lock);
481
	queue_delayed_work(priv->wq,
482 483 484 485 486 487 488 489
			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
	/* Add this entry to passive ids list head, but do not re-add it
	 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
	p->jiffies = jiffies;
	if (p->state == IPOIB_CM_RX_LIVE)
		list_move(&p->list, &priv->cm.passive_ids);
	spin_unlock_irq(&priv->lock);

490 491 492
	ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
	if (ret) {
		ipoib_warn(priv, "failed to send REP: %d\n", ret);
493 494
		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
			ipoib_warn(priv, "unable to move qp to error state\n");
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
	}
	return 0;

err_modify:
	ib_destroy_qp(p->qp);
err_qp:
	kfree(p);
	return ret;
}

static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
			       struct ib_cm_event *event)
{
	struct ipoib_cm_rx *p;
	struct ipoib_dev_priv *priv;

	switch (event->event) {
	case IB_CM_REQ_RECEIVED:
		return ipoib_cm_req_handler(cm_id, event);
	case IB_CM_DREQ_RECEIVED:
		ib_send_cm_drep(cm_id, NULL, 0);
		/* Fall through */
	case IB_CM_REJ_RECEIVED:
		p = cm_id->context;
519
		priv = ipoib_priv(p->dev);
M
Michael S. Tsirkin 已提交
520 521 522
		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
			ipoib_warn(priv, "unable to move qp to error state\n");
		/* Fall through */
523 524 525 526 527 528
	default:
		return 0;
	}
}
/* Adjust length of skb with fragments to match received data */
static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
529
			  unsigned int length, struct sk_buff *toskb)
530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
{
	int i, num_frags;
	unsigned int size;

	/* put header into skb */
	size = min(length, hdr_space);
	skb->tail += size;
	skb->len += size;
	length -= size;

	num_frags = skb_shinfo(skb)->nr_frags;
	for (i = 0; i < num_frags; i++) {
		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

		if (length == 0) {
			/* don't need this page */
546 547
			skb_fill_page_desc(toskb, i, skb_frag_page(frag),
					   0, PAGE_SIZE);
548 549
			--skb_shinfo(skb)->nr_frags;
		} else {
550
			size = min_t(unsigned int, length, PAGE_SIZE);
551

E
Eric Dumazet 已提交
552
			skb_frag_size_set(frag, size);
553 554 555 556 557 558 559 560 561 562
			skb->data_len += size;
			skb->truesize += size;
			skb->len += size;
			length -= size;
		}
	}
}

void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
563
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
564
	struct ipoib_cm_rx_buf *rx_ring;
565
	unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
566
	struct sk_buff *skb, *newskb;
567 568 569
	struct ipoib_cm_rx *p;
	unsigned long flags;
	u64 mapping[IPOIB_CM_RX_SG];
570
	int frags;
571
	int has_srq;
572
	struct sk_buff *small_skb;
573

574 575
	ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
		       wr_id, wc->status);
576 577

	if (unlikely(wr_id >= ipoib_recvq_size)) {
578
		if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
M
Michael S. Tsirkin 已提交
579 580 581
			spin_lock_irqsave(&priv->lock, flags);
			list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
			ipoib_cm_start_rx_drain(priv);
582
			queue_work(priv->wq, &priv->cm.rx_reap_task);
M
Michael S. Tsirkin 已提交
583 584 585 586
			spin_unlock_irqrestore(&priv->lock, flags);
		} else
			ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
				   wr_id, ipoib_recvq_size);
587 588 589
		return;
	}

590 591 592 593 594 595
	p = wc->qp->qp_context;

	has_srq = ipoib_cm_has_srq(dev);
	rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;

	skb = rx_ring[wr_id].skb;
596 597

	if (unlikely(wc->status != IB_WC_SUCCESS)) {
598 599 600
		ipoib_dbg(priv,
			  "cm recv error (status=%d, wrid=%d vend_err %#x)\n",
			  wc->status, wr_id, wc->vendor_err);
601
		++dev->stats.rx_dropped;
602 603 604 605 606 607 608
		if (has_srq)
			goto repost;
		else {
			if (!--p->recv_count) {
				spin_lock_irqsave(&priv->lock, flags);
				list_move(&p->list, &priv->cm.rx_reap_list);
				spin_unlock_irqrestore(&priv->lock, flags);
609
				queue_work(priv->wq, &priv->cm.rx_reap_task);
610 611 612
			}
			return;
		}
613 614
	}

615
	if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
616
		if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
617 618
			spin_lock_irqsave(&priv->lock, flags);
			p->jiffies = jiffies;
M
Michael S. Tsirkin 已提交
619 620 621
			/* Move this entry to list head, but do not re-add it
			 * if it has been moved out of list. */
			if (p->state == IPOIB_CM_RX_LIVE)
622 623 624 625 626
				list_move(&p->list, &priv->cm.passive_ids);
			spin_unlock_irqrestore(&priv->lock, flags);
		}
	}

627 628 629
	if (wc->byte_len < IPOIB_CM_COPYBREAK) {
		int dlen = wc->byte_len;

630
		small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE);
631
		if (small_skb) {
632
			skb_reserve(small_skb, IPOIB_CM_RX_RESERVE);
633 634 635 636 637 638 639 640 641 642 643
			ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
						   dlen, DMA_FROM_DEVICE);
			skb_copy_from_linear_data(skb, small_skb->data, dlen);
			ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
						      dlen, DMA_FROM_DEVICE);
			skb_put(small_skb, dlen);
			skb = small_skb;
			goto copied;
		}
	}

644 645 646
	frags = PAGE_ALIGN(wc->byte_len -
			   min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) /
		PAGE_SIZE;
647

648 649
	newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
				       mapping, GFP_ATOMIC);
650
	if (unlikely(!newskb)) {
651 652 653 654 655
		/*
		 * If we can't allocate a new RX buffer, dump
		 * this packet and reuse the old buffer.
		 */
		ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
656
		++dev->stats.rx_dropped;
657 658 659
		goto repost;
	}

660 661
	ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
	memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
662 663 664 665

	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
		       wc->byte_len, wc->slid);

666
	skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
667

668
copied:
669
	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
670
	skb_add_pseudo_hdr(skb);
671

672 673
	++dev->stats.rx_packets;
	dev->stats.rx_bytes += skb->len;
674 675 676 677

	skb->dev = dev;
	/* XXX get correct PACKET_ type here */
	skb->pkt_type = PACKET_HOST;
R
Roland Dreier 已提交
678
	netif_receive_skb(skb);
679 680

repost:
681 682 683 684 685
	if (has_srq) {
		if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id)))
			ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
				   "for buf %d\n", wr_id);
	} else {
686 687 688 689
		if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
							  &priv->cm.rx_wr,
							  priv->cm.rx_sge,
							  wr_id))) {
690 691 692 693 694
			--p->recv_count;
			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
				   "for buf %d\n", wr_id);
		}
	}
695 696 697 698 699
}

static inline int post_send(struct ipoib_dev_priv *priv,
			    struct ipoib_cm_tx *tx,
			    unsigned int wr_id,
700
			    struct ipoib_tx_buf *tx_req)
701 702 703
{
	struct ib_send_wr *bad_wr;

704
	ipoib_build_sge(priv, tx_req);
705

C
Christoph Hellwig 已提交
706
	priv->tx_wr.wr.wr_id	= wr_id | IPOIB_OP_CM;
707

C
Christoph Hellwig 已提交
708
	return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr);
709 710 711 712
}

void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
713
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
714
	struct ipoib_tx_buf *tx_req;
715
	int rc;
716
	unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
717 718 719 720

	if (unlikely(skb->len > tx->mtu)) {
		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
			   skb->len, tx->mtu);
721 722
		++dev->stats.tx_dropped;
		++dev->stats.tx_errors;
723
		ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
724 725
		return;
	}
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
	if (skb_shinfo(skb)->nr_frags > usable_sge) {
		if (skb_linearize(skb) < 0) {
			ipoib_warn(priv, "skb could not be linearized\n");
			++dev->stats.tx_dropped;
			++dev->stats.tx_errors;
			dev_kfree_skb_any(skb);
			return;
		}
		/* Does skb_linearize return ok without reducing nr_frags? */
		if (skb_shinfo(skb)->nr_frags > usable_sge) {
			ipoib_warn(priv, "too many frags after skb linearize\n");
			++dev->stats.tx_dropped;
			++dev->stats.tx_errors;
			dev_kfree_skb_any(skb);
			return;
		}
	}
743 744 745 746 747 748 749 750 751 752 753 754
	ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
		       tx->tx_head, skb->len, tx->qp->qp_num);

	/*
	 * We put the skb into the tx_ring _before_ we call post_send()
	 * because it's entirely possible that the completion handler will
	 * run before we execute anything after the post_send().  That
	 * means we have to make sure everything is properly recorded and
	 * our state is consistent before we call post_send().
	 */
	tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
	tx_req->skb = skb;
755 756

	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
757
		++dev->stats.tx_errors;
758 759 760 761
		dev_kfree_skb_any(skb);
		return;
	}

E
Erez Shitrit 已提交
762 763 764 765 766 767
	if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) {
		ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
			  tx->qp->qp_num);
		netif_stop_queue(dev);
	}

768 769 770
	skb_orphan(skb);
	skb_dst_drop(skb);

771 772 773 774
	if (netif_queue_stopped(dev)) {
		rc = ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
				      IB_CQ_REPORT_MISSED_EVENTS);
		if (unlikely(rc < 0))
E
Erez Shitrit 已提交
775
			ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n");
776
		else if (rc)
E
Erez Shitrit 已提交
777
			napi_schedule(&priv->send_napi);
778
	}
E
Erez Shitrit 已提交
779

780
	rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
781
	if (unlikely(rc)) {
E
Erez Shitrit 已提交
782
		ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n", rc);
783
		++dev->stats.tx_errors;
784
		ipoib_dma_unmap_tx(priv, tx_req);
785
		dev_kfree_skb_any(skb);
E
Erez Shitrit 已提交
786 787 788

		if (netif_queue_stopped(dev))
			netif_wake_queue(dev);
789
	} else {
790
		netif_trans_update(dev);
791
		++tx->tx_head;
792
		++priv->tx_head;
793 794 795
	}
}

796
void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
797
{
798
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
799 800
	struct ipoib_cm_tx *tx = wc->qp->qp_context;
	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
801
	struct ipoib_tx_buf *tx_req;
802 803
	unsigned long flags;

804 805
	ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
		       wr_id, wc->status);
806 807 808 809 810 811 812 813 814

	if (unlikely(wr_id >= ipoib_sendq_size)) {
		ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
			   wr_id, ipoib_sendq_size);
		return;
	}

	tx_req = &tx->tx_ring[wr_id];

815
	ipoib_dma_unmap_tx(priv, tx_req);
816 817

	/* FIXME: is this right? Shouldn't we only increment on success? */
818 819
	++dev->stats.tx_packets;
	dev->stats.tx_bytes += tx_req->skb->len;
820 821 822

	dev_kfree_skb_any(tx_req->skb);

823 824
	netif_tx_lock(dev);

825
	++tx->tx_tail;
826
	++priv->tx_tail;
E
Erez Shitrit 已提交
827 828 829 830

	if (unlikely(netif_queue_stopped(dev) &&
		     (priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 &&
		     test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
831 832 833 834 835 836
		netif_wake_queue(dev);

	if (wc->status != IB_WC_SUCCESS &&
	    wc->status != IB_WC_WR_FLUSH_ERR) {
		struct ipoib_neigh *neigh;

837 838 839 840 841 842
		/* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle,
		 * so don't make waves.
		 */
		if (wc->status == IB_WC_RNR_RETRY_EXC_ERR ||
		    wc->status == IB_WC_RETRY_EXC_ERR)
			ipoib_dbg(priv,
843
				  "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n",
844
				   __func__, wc->status, wr_id, wc->vendor_err);
845
		else
846
			ipoib_warn(priv,
847
				    "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n",
848
				   __func__, wc->status, wr_id, wc->vendor_err);
849

850
		spin_lock_irqsave(&priv->lock, flags);
851 852 853 854
		neigh = tx->neigh;

		if (neigh) {
			neigh->cm = NULL;
855
			ipoib_neigh_free(neigh);
856 857 858 859 860 861

			tx->neigh = NULL;
		}

		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
			list_move(&tx->list, &priv->cm.reap_list);
862
			queue_work(priv->wq, &priv->cm.reap_task);
863 864 865 866
		}

		clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);

867
		spin_unlock_irqrestore(&priv->lock, flags);
868 869
	}

870
	netif_tx_unlock(dev);
871 872 873 874
}

int ipoib_cm_dev_open(struct net_device *dev)
{
875
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
876 877 878 879 880 881 882
	int ret;

	if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
		return 0;

	priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
	if (IS_ERR(priv->cm.id)) {
883
		pr_warn("%s: failed to create CM ID\n", priv->ca->name);
884
		ret = PTR_ERR(priv->cm.id);
M
Michael S. Tsirkin 已提交
885
		goto err_cm;
886 887 888
	}

	ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
H
Haggai Eran 已提交
889
			   0);
890
	if (ret) {
891 892
		pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name,
			IPOIB_CM_IETF_ID | priv->qp->qp_num);
M
Michael S. Tsirkin 已提交
893
		goto err_listen;
894
	}
M
Michael S. Tsirkin 已提交
895

896
	return 0;
M
Michael S. Tsirkin 已提交
897 898 899 900 901 902

err_listen:
	ib_destroy_cm_id(priv->cm.id);
err_cm:
	priv->cm.id = NULL;
	return ret;
903 904
}

905 906
static void ipoib_cm_free_rx_reap_list(struct net_device *dev)
{
907
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
908 909 910 911 912 913 914 915 916 917
	struct ipoib_cm_rx *rx, *n;
	LIST_HEAD(list);

	spin_lock_irq(&priv->lock);
	list_splice_init(&priv->cm.rx_reap_list, &list);
	spin_unlock_irq(&priv->lock);

	list_for_each_entry_safe(rx, n, &list, list) {
		ib_destroy_cm_id(rx->id);
		ib_destroy_qp(rx->qp);
918 919 920 921 922 923
		if (!ipoib_cm_has_srq(dev)) {
			ipoib_cm_free_rx_ring(priv->dev, rx->rx_ring);
			spin_lock_irq(&priv->lock);
			--priv->cm.nonsrq_conn_qp;
			spin_unlock_irq(&priv->lock);
		}
924 925 926 927
		kfree(rx);
	}
}

928 929
void ipoib_cm_dev_stop(struct net_device *dev)
{
930
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
931
	struct ipoib_cm_rx *p;
M
Michael S. Tsirkin 已提交
932 933
	unsigned long begin;
	int ret;
934

935
	if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
936 937 938
		return;

	ib_destroy_cm_id(priv->cm.id);
939
	priv->cm.id = NULL;
M
Michael S. Tsirkin 已提交
940

941
	spin_lock_irq(&priv->lock);
942 943
	while (!list_empty(&priv->cm.passive_ids)) {
		p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
M
Michael S. Tsirkin 已提交
944 945
		list_move(&p->list, &priv->cm.rx_error_list);
		p->state = IPOIB_CM_RX_ERROR;
946
		spin_unlock_irq(&priv->lock);
M
Michael S. Tsirkin 已提交
947 948 949 950 951 952 953 954 955 956 957 958
		ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
		if (ret)
			ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
		spin_lock_irq(&priv->lock);
	}

	/* Wait for all RX to be drained */
	begin = jiffies;

	while (!list_empty(&priv->cm.rx_error_list) ||
	       !list_empty(&priv->cm.rx_flush_list) ||
	       !list_empty(&priv->cm.rx_drain_list)) {
959
		if (time_after(jiffies, begin + 5 * HZ)) {
M
Michael S. Tsirkin 已提交
960 961 962 963 964
			ipoib_warn(priv, "RX drain timing out\n");

			/*
			 * assume the HW is wedged and just free up everything.
			 */
965 966 967 968 969 970
			list_splice_init(&priv->cm.rx_flush_list,
					 &priv->cm.rx_reap_list);
			list_splice_init(&priv->cm.rx_error_list,
					 &priv->cm.rx_reap_list);
			list_splice_init(&priv->cm.rx_drain_list,
					 &priv->cm.rx_reap_list);
M
Michael S. Tsirkin 已提交
971 972 973
			break;
		}
		spin_unlock_irq(&priv->lock);
974
		usleep_range(1000, 2000);
975
		ipoib_drain_cq(dev);
M
Michael S. Tsirkin 已提交
976 977 978 979 980
		spin_lock_irq(&priv->lock);
	}

	spin_unlock_irq(&priv->lock);

981
	ipoib_cm_free_rx_reap_list(dev);
982 983 984 985 986 987 988

	cancel_delayed_work(&priv->cm.stale_task);
}

static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
	struct ipoib_cm_tx *p = cm_id->context;
989
	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
990 991 992 993 994 995 996 997
	struct ipoib_cm_data *data = event->private_data;
	struct sk_buff_head skqueue;
	struct ib_qp_attr qp_attr;
	int qp_attr_mask, ret;
	struct sk_buff *skb;

	p->mtu = be32_to_cpu(data->mtu);

998 999 1000
	if (p->mtu <= IPOIB_ENCAP_LEN) {
		ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n",
			   p->mtu, IPOIB_ENCAP_LEN);
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
		return -EINVAL;
	}

	qp_attr.qp_state = IB_QPS_RTR;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
		return ret;
	}

	qp_attr.rq_psn = 0 /* FIXME */;
	ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
		return ret;
	}

	qp_attr.qp_state = IB_QPS_RTS;
	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
		return ret;
	}
	ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
		return ret;
	}

	skb_queue_head_init(&skqueue);

1032
	spin_lock_irq(&priv->lock);
1033 1034 1035 1036
	set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
	if (p->neigh)
		while ((skb = __skb_dequeue(&p->neigh->queue)))
			__skb_queue_tail(&skqueue, skb);
1037
	spin_unlock_irq(&priv->lock);
1038 1039 1040

	while ((skb = __skb_dequeue(&skqueue))) {
		skb->dev = p->dev;
1041 1042 1043 1044
		ret = dev_queue_xmit(skb);
		if (ret)
			ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
				   __func__, ret);
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
	}

	ret = ib_send_cm_rtu(cm_id, NULL, 0);
	if (ret) {
		ipoib_warn(priv, "failed to send RTU: %d\n", ret);
		return ret;
	}
	return 0;
}

1055
static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
1056
{
1057
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1058
	struct ib_qp_init_attr attr = {
E
Erez Shitrit 已提交
1059
		.send_cq		= priv->send_cq,
1060
		.recv_cq		= priv->recv_cq,
1061 1062 1063 1064 1065
		.srq			= priv->cm.srq,
		.cap.max_send_wr	= ipoib_sendq_size,
		.cap.max_send_sge	= 1,
		.sq_sig_type		= IB_SIGNAL_ALL_WR,
		.qp_type		= IB_QPT_RC,
1066
		.qp_context		= tx,
1067
		.create_flags		= 0
1068
	};
1069 1070
	struct ib_qp *tx_qp;

1071
	if (dev->features & NETIF_F_SG)
1072 1073
		attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
					      MAX_SKB_FRAGS + 1);
1074

1075
	tx_qp = ib_create_qp(priv->pd, &attr);
1076
	tx->max_send_sge = attr.cap.max_send_sge;
1077
	return tx_qp;
1078 1079 1080 1081 1082
}

static int ipoib_cm_send_req(struct net_device *dev,
			     struct ib_cm_id *id, struct ib_qp *qp,
			     u32 qpn,
1083
			     struct sa_path_rec *pathrec)
1084
{
1085
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1086 1087 1088 1089 1090 1091
	struct ipoib_cm_data data = {};
	struct ib_cm_req_param req = {};

	data.qpn = cpu_to_be32(priv->qp->qp_num);
	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);

1092 1093 1094 1095 1096 1097 1098 1099
	req.primary_path		= pathrec;
	req.alternate_path		= NULL;
	req.service_id			= cpu_to_be64(IPOIB_CM_IETF_ID | qpn);
	req.qp_num			= qp->qp_num;
	req.qp_type			= qp->qp_type;
	req.private_data		= &data;
	req.private_data_len		= sizeof data;
	req.flow_control		= 0;
1100

1101
	req.starting_psn		= 0; /* FIXME */
1102 1103 1104 1105 1106

	/*
	 * Pick some arbitrary defaults here; we could make these
	 * module parameters if anyone cared about setting them.
	 */
1107 1108 1109 1110 1111 1112
	req.responder_resources		= 4;
	req.remote_cm_response_timeout	= 20;
	req.local_cm_response_timeout	= 20;
	req.retry_count			= 0; /* RFC draft warns against retries */
	req.rnr_retry_count		= 0; /* RFC draft warns against retries */
	req.max_cm_retries		= 15;
1113
	req.srq				= ipoib_cm_has_srq(dev);
1114 1115 1116 1117 1118 1119
	return ib_send_cm_req(id, &req);
}

static int ipoib_cm_modify_tx_init(struct net_device *dev,
				  struct ib_cm_id *cm_id, struct ib_qp *qp)
{
1120
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1121 1122
	struct ib_qp_attr qp_attr;
	int qp_attr_mask, ret;
1123
	ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
1124
	if (ret) {
1125
		ipoib_warn(priv, "pkey 0x%x not found: %d\n", priv->pkey, ret);
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
		return ret;
	}

	qp_attr.qp_state = IB_QPS_INIT;
	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
	qp_attr.port_num = priv->port;
	qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;

	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
	if (ret) {
		ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
		return ret;
	}
	return 0;
}

static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
1143
			    struct sa_path_rec *pathrec)
1144
{
1145
	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
1146
	unsigned int noio_flag;
1147 1148
	int ret;

1149
	noio_flag = memalloc_noio_save();
1150
	p->tx_ring = vzalloc(array_size(ipoib_sendq_size, sizeof(*p->tx_ring)));
1151
	if (!p->tx_ring) {
1152
		memalloc_noio_restore(noio_flag);
1153 1154 1155
		ret = -ENOMEM;
		goto err_tx;
	}
1156
	memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
1157

1158
	p->qp = ipoib_cm_create_tx_qp(p->dev, p);
1159
	memalloc_noio_restore(noio_flag);
1160 1161
	if (IS_ERR(p->qp)) {
		ret = PTR_ERR(p->qp);
1162
		ipoib_warn(priv, "failed to create tx qp: %d\n", ret);
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175
		goto err_qp;
	}

	p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p);
	if (IS_ERR(p->id)) {
		ret = PTR_ERR(p->id);
		ipoib_warn(priv, "failed to create tx cm id: %d\n", ret);
		goto err_id;
	}

	ret = ipoib_cm_modify_tx_init(p->dev, p->id,  p->qp);
	if (ret) {
		ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
Z
Zhu Yanjun 已提交
1176
		goto err_modify_send;
1177 1178 1179 1180 1181
	}

	ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
	if (ret) {
		ipoib_warn(priv, "failed to send cm req: %d\n", ret);
Z
Zhu Yanjun 已提交
1182
		goto err_modify_send;
1183 1184
	}

H
Harvey Harrison 已提交
1185
	ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n",
1186
		  p->qp->qp_num, pathrec->dgid.raw, qpn);
1187 1188 1189

	return 0;

Z
Zhu Yanjun 已提交
1190
err_modify_send:
1191 1192 1193 1194 1195 1196
	ib_destroy_cm_id(p->id);
err_id:
	p->id = NULL;
	ib_destroy_qp(p->qp);
err_qp:
	p->qp = NULL;
1197
	vfree(p->tx_ring);
1198 1199 1200 1201 1202 1203
err_tx:
	return ret;
}

static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
1204
	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
1205
	struct ipoib_tx_buf *tx_req;
1206
	unsigned long begin;
1207 1208 1209 1210 1211 1212 1213 1214

	ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
		  p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);

	if (p->id)
		ib_destroy_cm_id(p->id);

	if (p->tx_ring) {
1215 1216
		/* Wait for all sends to complete */
		begin = jiffies;
1217
		while ((int) p->tx_tail - (int) p->tx_head < 0) {
1218 1219 1220 1221 1222 1223
			if (time_after(jiffies, begin + 5 * HZ)) {
				ipoib_warn(priv, "timing out; %d sends not completed\n",
					   p->tx_head - p->tx_tail);
				goto timeout;
			}

1224
			usleep_range(1000, 2000);
1225
		}
1226 1227 1228
	}

timeout:
1229

1230 1231
	while ((int) p->tx_tail - (int) p->tx_head < 0) {
		tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
1232
		ipoib_dma_unmap_tx(priv, tx_req);
1233
		dev_kfree_skb_any(tx_req->skb);
E
Erez Shitrit 已提交
1234
		netif_tx_lock_bh(p->dev);
1235
		++p->tx_tail;
1236 1237
		++priv->tx_tail;
		if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) &&
1238 1239 1240
		    netif_queue_stopped(p->dev) &&
		    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
			netif_wake_queue(p->dev);
1241
		netif_tx_unlock_bh(p->dev);
1242 1243
	}

1244 1245 1246
	if (p->qp)
		ib_destroy_qp(p->qp);

1247
	vfree(p->tx_ring);
1248 1249 1250 1251 1252 1253 1254
	kfree(p);
}

static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
			       struct ib_cm_event *event)
{
	struct ipoib_cm_tx *tx = cm_id->context;
1255
	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
1256 1257
	struct net_device *dev = priv->dev;
	struct ipoib_neigh *neigh;
1258
	unsigned long flags;
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276
	int ret;

	switch (event->event) {
	case IB_CM_DREQ_RECEIVED:
		ipoib_dbg(priv, "DREQ received.\n");
		ib_send_cm_drep(cm_id, NULL, 0);
		break;
	case IB_CM_REP_RECEIVED:
		ipoib_dbg(priv, "REP received.\n");
		ret = ipoib_cm_rep_handler(cm_id, event);
		if (ret)
			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
				       NULL, 0, NULL, 0);
		break;
	case IB_CM_REQ_ERROR:
	case IB_CM_REJ_RECEIVED:
	case IB_CM_TIMEWAIT_EXIT:
		ipoib_dbg(priv, "CM error %d.\n", event->event);
1277 1278
		netif_tx_lock_bh(dev);
		spin_lock_irqsave(&priv->lock, flags);
1279 1280 1281 1282
		neigh = tx->neigh;

		if (neigh) {
			neigh->cm = NULL;
1283
			ipoib_neigh_free(neigh);
1284 1285 1286 1287 1288 1289

			tx->neigh = NULL;
		}

		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
			list_move(&tx->list, &priv->cm.reap_list);
1290
			queue_work(priv->wq, &priv->cm.reap_task);
1291 1292
		}

1293 1294
		spin_unlock_irqrestore(&priv->lock, flags);
		netif_tx_unlock_bh(dev);
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305
		break;
	default:
		break;
	}

	return 0;
}

struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
				       struct ipoib_neigh *neigh)
{
1306
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
	struct ipoib_cm_tx *tx;

	tx = kzalloc(sizeof *tx, GFP_ATOMIC);
	if (!tx)
		return NULL;

	neigh->cm = tx;
	tx->neigh = neigh;
	tx->path = path;
	tx->dev = dev;
	list_add(&tx->list, &priv->cm.start_list);
	set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
1319
	queue_work(priv->wq, &priv->cm.start_task);
1320 1321 1322 1323 1324
	return tx;
}

void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
{
1325
	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
1326
	unsigned long flags;
1327
	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1328
		spin_lock_irqsave(&priv->lock, flags);
1329
		list_move(&tx->list, &priv->cm.reap_list);
1330
		queue_work(priv->wq, &priv->cm.reap_task);
H
Harvey Harrison 已提交
1331
		ipoib_dbg(priv, "Reap connection for gid %pI6\n",
1332
			  tx->neigh->daddr + 4);
1333
		tx->neigh = NULL;
1334
		spin_unlock_irqrestore(&priv->lock, flags);
1335 1336 1337
	}
}

1338 1339
#define QPN_AND_OPTIONS_OFFSET	4

1340 1341 1342 1343 1344 1345 1346 1347
static void ipoib_cm_tx_start(struct work_struct *work)
{
	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
						   cm.start_task);
	struct net_device *dev = priv->dev;
	struct ipoib_neigh *neigh;
	struct ipoib_cm_tx *p;
	unsigned long flags;
1348
	struct ipoib_path *path;
1349 1350
	int ret;

1351
	struct sa_path_rec pathrec;
1352 1353
	u32 qpn;

1354 1355 1356
	netif_tx_lock_bh(dev);
	spin_lock_irqsave(&priv->lock, flags);

1357 1358 1359 1360
	while (!list_empty(&priv->cm.start_list)) {
		p = list_entry(priv->cm.start_list.next, typeof(*p), list);
		list_del_init(&p->list);
		neigh = p->neigh;
1361

1362
		qpn = IPOIB_QPN(neigh->daddr);
1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373
		/*
		 * As long as the search is with these 2 locks,
		 * path existence indicates its validity.
		 */
		path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
		if (!path) {
			pr_info("%s ignore not valid path %pI6\n",
				__func__,
				neigh->daddr + QPN_AND_OPTIONS_OFFSET);
			goto free_neigh;
		}
1374
		memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
1375 1376 1377 1378

		spin_unlock_irqrestore(&priv->lock, flags);
		netif_tx_unlock_bh(dev);

1379
		ret = ipoib_cm_tx_init(p, qpn, &pathrec);
1380 1381 1382 1383

		netif_tx_lock_bh(dev);
		spin_lock_irqsave(&priv->lock, flags);

1384
		if (ret) {
1385
free_neigh:
1386 1387 1388
			neigh = p->neigh;
			if (neigh) {
				neigh->cm = NULL;
1389
				ipoib_neigh_free(neigh);
1390 1391 1392 1393 1394
			}
			list_del(&p->list);
			kfree(p);
		}
	}
1395 1396 1397

	spin_unlock_irqrestore(&priv->lock, flags);
	netif_tx_unlock_bh(dev);
1398 1399 1400 1401 1402 1403
}

static void ipoib_cm_tx_reap(struct work_struct *work)
{
	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
						   cm.reap_task);
1404
	struct net_device *dev = priv->dev;
1405
	struct ipoib_cm_tx *p;
1406 1407 1408 1409
	unsigned long flags;

	netif_tx_lock_bh(dev);
	spin_lock_irqsave(&priv->lock, flags);
1410 1411 1412

	while (!list_empty(&priv->cm.reap_list)) {
		p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
1413
		list_del_init(&p->list);
1414 1415
		spin_unlock_irqrestore(&priv->lock, flags);
		netif_tx_unlock_bh(dev);
1416
		ipoib_cm_tx_destroy(p);
1417 1418
		netif_tx_lock_bh(dev);
		spin_lock_irqsave(&priv->lock, flags);
1419
	}
1420 1421 1422

	spin_unlock_irqrestore(&priv->lock, flags);
	netif_tx_unlock_bh(dev);
1423 1424 1425 1426 1427 1428
}

static void ipoib_cm_skb_reap(struct work_struct *work)
{
	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
						   cm.skb_task);
1429
	struct net_device *dev = priv->dev;
1430
	struct sk_buff *skb;
1431
	unsigned long flags;
1432 1433
	unsigned mtu = priv->mcast_mtu;

1434 1435 1436
	netif_tx_lock_bh(dev);
	spin_lock_irqsave(&priv->lock, flags);

1437
	while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
1438 1439 1440
		spin_unlock_irqrestore(&priv->lock, flags);
		netif_tx_unlock_bh(dev);

1441 1442
		if (skb->protocol == htons(ETH_P_IP))
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
R
Roland Dreier 已提交
1443
#if IS_ENABLED(CONFIG_IPV6)
1444
		else if (skb->protocol == htons(ETH_P_IPV6))
1445
			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1446 1447
#endif
		dev_kfree_skb_any(skb);
1448 1449 1450

		netif_tx_lock_bh(dev);
		spin_lock_irqsave(&priv->lock, flags);
1451
	}
1452 1453 1454

	spin_unlock_irqrestore(&priv->lock, flags);
	netif_tx_unlock_bh(dev);
1455 1456
}

1457
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1458 1459
			   unsigned int mtu)
{
1460
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1461 1462
	int e = skb_queue_empty(&priv->cm.skb_queue);

1463
	skb_dst_update_pmtu(skb, mtu);
1464 1465 1466

	skb_queue_tail(&priv->cm.skb_queue, skb);
	if (e)
1467
		queue_work(priv->wq, &priv->cm.skb_task);
1468 1469
}

M
Michael S. Tsirkin 已提交
1470 1471
static void ipoib_cm_rx_reap(struct work_struct *work)
{
1472 1473
	ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv,
						cm.rx_reap_task)->dev);
M
Michael S. Tsirkin 已提交
1474 1475
}

1476 1477 1478 1479 1480
static void ipoib_cm_stale_task(struct work_struct *work)
{
	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
						   cm.stale_task.work);
	struct ipoib_cm_rx *p;
M
Michael S. Tsirkin 已提交
1481
	int ret;
1482

1483
	spin_lock_irq(&priv->lock);
1484
	while (!list_empty(&priv->cm.passive_ids)) {
M
Michael S. Tsirkin 已提交
1485
		/* List is sorted by LRU, start from tail,
1486 1487
		 * stop when we see a recently used entry */
		p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
1488
		if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
1489
			break;
M
Michael S. Tsirkin 已提交
1490 1491
		list_move(&p->list, &priv->cm.rx_error_list);
		p->state = IPOIB_CM_RX_ERROR;
1492
		spin_unlock_irq(&priv->lock);
M
Michael S. Tsirkin 已提交
1493 1494 1495
		ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
		if (ret)
			ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
1496
		spin_lock_irq(&priv->lock);
1497
	}
1498 1499

	if (!list_empty(&priv->cm.passive_ids))
1500
		queue_delayed_work(priv->wq,
1501
				   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
1502
	spin_unlock_irq(&priv->lock);
1503 1504
}

1505
static ssize_t show_mode(struct device *d, struct device_attribute *attr,
1506 1507
			 char *buf)
{
1508 1509
	struct net_device *dev = to_net_dev(d);
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1510 1511 1512 1513 1514 1515 1516

	if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
		return sprintf(buf, "connected\n");
	else
		return sprintf(buf, "datagram\n");
}

1517 1518 1519 1520 1521
static ssize_t set_mode(struct device *d, struct device_attribute *attr,
			const char *buf, size_t count)
{
	struct net_device *dev = to_net_dev(d);
	int ret;
1522
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1523 1524 1525

	if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
		return -EPERM;
1526

1527
	if (!mutex_trylock(&priv->sysfs_mutex))
1528 1529
		return restart_syscall();

1530 1531 1532 1533 1534
	if (!rtnl_trylock()) {
		mutex_unlock(&priv->sysfs_mutex);
		return restart_syscall();
	}

1535 1536
	ret = ipoib_set_mode(dev, buf);

1537 1538 1539 1540 1541 1542
	/* The assumption is that the function ipoib_set_mode returned
	 * with the rtnl held by it, if not the value -EBUSY returned,
	 * then no need to rtnl_unlock
	 */
	if (ret != -EBUSY)
		rtnl_unlock();
1543
	mutex_unlock(&priv->sysfs_mutex);
1544

1545
	return (!ret || ret == -EBUSY) ? count : ret;
1546 1547
}

1548
static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
1549 1550 1551 1552 1553 1554

int ipoib_cm_add_mode_attr(struct net_device *dev)
{
	return device_create_file(&dev->dev, &dev_attr_mode);
}

1555
static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
1556
{
1557
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1558
	struct ib_srq_init_attr srq_init_attr = {
S
Sean Hefty 已提交
1559
		.srq_type = IB_SRQT_BASIC,
1560 1561
		.attr = {
			.max_wr  = ipoib_recvq_size,
1562
			.max_sge = max_sge
1563 1564
		}
	};
1565 1566 1567

	priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
	if (IS_ERR(priv->cm.srq)) {
1568
		if (PTR_ERR(priv->cm.srq) != -ENOSYS)
1569
			pr_warn("%s: failed to allocate SRQ, error %ld\n",
1570
			       priv->ca->name, PTR_ERR(priv->cm.srq));
1571
		priv->cm.srq = NULL;
1572
		return;
1573 1574
	}

1575 1576
	priv->cm.srq_ring = vzalloc(array_size(ipoib_recvq_size,
					       sizeof(*priv->cm.srq_ring)));
1577 1578 1579
	if (!priv->cm.srq_ring) {
		ib_destroy_srq(priv->cm.srq);
		priv->cm.srq = NULL;
1580
		return;
1581
	}
1582

1583 1584 1585 1586
}

int ipoib_cm_dev_init(struct net_device *dev)
{
1587
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1588
	int max_srq_sge, i;
1589 1590 1591 1592

	INIT_LIST_HEAD(&priv->cm.passive_ids);
	INIT_LIST_HEAD(&priv->cm.reap_list);
	INIT_LIST_HEAD(&priv->cm.start_list);
M
Michael S. Tsirkin 已提交
1593 1594 1595 1596
	INIT_LIST_HEAD(&priv->cm.rx_error_list);
	INIT_LIST_HEAD(&priv->cm.rx_flush_list);
	INIT_LIST_HEAD(&priv->cm.rx_drain_list);
	INIT_LIST_HEAD(&priv->cm.rx_reap_list);
1597 1598 1599
	INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
	INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
	INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
M
Michael S. Tsirkin 已提交
1600
	INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
1601 1602 1603 1604
	INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);

	skb_queue_head_init(&priv->cm.skb_queue);

1605
	ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
1606

1607 1608
	max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
	ipoib_cm_create_srq(dev, max_srq_sge);
1609
	if (ipoib_cm_has_srq(dev)) {
1610 1611
		priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
		priv->cm.num_frags  = max_srq_sge;
1612 1613 1614 1615 1616 1617 1618
		ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
			  priv->cm.max_cm_mtu, priv->cm.num_frags);
	} else {
		priv->cm.max_cm_mtu = IPOIB_CM_MTU;
		priv->cm.num_frags  = IPOIB_CM_RX_SG;
	}

1619
	ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
1620 1621 1622 1623

	if (ipoib_cm_has_srq(dev)) {
		for (i = 0; i < ipoib_recvq_size; ++i) {
			if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
1624
						   priv->cm.num_frags - 1,
1625 1626
						   priv->cm.srq_ring[i].mapping,
						   GFP_KERNEL)) {
1627 1628 1629 1630 1631
				ipoib_warn(priv, "failed to allocate "
					   "receive buffer %d\n", i);
				ipoib_cm_dev_cleanup(dev);
				return -ENOMEM;
			}
1632

1633 1634 1635 1636 1637 1638
			if (ipoib_cm_post_receive_srq(dev, i)) {
				ipoib_warn(priv, "ipoib_cm_post_receive_srq "
					   "failed for buf %d\n", i);
				ipoib_cm_dev_cleanup(dev);
				return -EIO;
			}
1639 1640 1641 1642 1643 1644 1645 1646 1647
		}
	}

	priv->dev->dev_addr[0] = IPOIB_FLAGS_RC;
	return 0;
}

void ipoib_cm_dev_cleanup(struct net_device *dev)
{
1648
	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1649
	int ret;
1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662

	if (!priv->cm.srq)
		return;

	ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");

	ret = ib_destroy_srq(priv->cm.srq);
	if (ret)
		ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret);

	priv->cm.srq = NULL;
	if (!priv->cm.srq_ring)
		return;
1663 1664

	ipoib_cm_free_rx_ring(dev, priv->cm.srq_ring);
1665 1666
	priv->cm.srq_ring = NULL;
}