qp.c 22.1 KB
Newer Older
M
Mike Marciniszyn 已提交
1
/*
J
Jubin John 已提交
2
 * Copyright(c) 2015, 2016 Intel Corporation.
M
Mike Marciniszyn 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
 *
 * GPL LICENSE SUMMARY
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * BSD LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *  - Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *  - Neither the name of Intel Corporation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <linux/err.h>
#include <linux/vmalloc.h>
#include <linux/hash.h>
#include <linux/module.h>
#include <linux/seq_file.h>
53 54
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
55
#include <rdma/ib_verbs.h>
M
Mike Marciniszyn 已提交
56 57 58 59

#include "hfi.h"
#include "qp.h"
#include "trace.h"
60
#include "verbs_txreq.h"
M
Mike Marciniszyn 已提交
61

62
unsigned int hfi1_qp_table_size = 256;
M
Mike Marciniszyn 已提交
63 64 65
module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
MODULE_PARM_DESC(qp_table_size, "QP table size");

66
static void flush_tx_list(struct rvt_qp *qp);
M
Mike Marciniszyn 已提交
67 68 69 70
static int iowait_sleep(
	struct sdma_engine *sde,
	struct iowait *wait,
	struct sdma_txreq *stx,
71 72
	unsigned int seq,
	bool pkts_sent);
M
Mike Marciniszyn 已提交
73
static void iowait_wakeup(struct iowait *wait, int reason);
74
static void iowait_sdma_drained(struct iowait *wait);
75
static void qp_pio_drain(struct rvt_qp *qp);
M
Mike Marciniszyn 已提交
76

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
	.length = sizeof(struct ib_rdma_wr),
	.qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
},

[IB_WR_RDMA_READ] = {
	.length = sizeof(struct ib_rdma_wr),
	.qpt_support = BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_ATOMIC,
},

[IB_WR_ATOMIC_CMP_AND_SWP] = {
	.length = sizeof(struct ib_atomic_wr),
	.qpt_support = BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
},

[IB_WR_ATOMIC_FETCH_AND_ADD] = {
	.length = sizeof(struct ib_atomic_wr),
	.qpt_support = BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
},

[IB_WR_RDMA_WRITE_WITH_IMM] = {
	.length = sizeof(struct ib_rdma_wr),
	.qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
},

[IB_WR_SEND] = {
	.length = sizeof(struct ib_send_wr),
	.qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
		       BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
},

[IB_WR_SEND_WITH_IMM] = {
	.length = sizeof(struct ib_send_wr),
	.qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
		       BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
},

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
[IB_WR_REG_MR] = {
	.length = sizeof(struct ib_reg_wr),
	.qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_LOCAL,
},

[IB_WR_LOCAL_INV] = {
	.length = sizeof(struct ib_send_wr),
	.qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_LOCAL,
},

[IB_WR_SEND_WITH_INV] = {
	.length = sizeof(struct ib_send_wr),
	.qpt_support = BIT(IB_QPT_RC),
},

135 136
};

137
static void flush_tx_list(struct rvt_qp *qp)
M
Mike Marciniszyn 已提交
138
{
139 140 141
	struct hfi1_qp_priv *priv = qp->priv;

	while (!list_empty(&priv->s_iowait.tx_head)) {
M
Mike Marciniszyn 已提交
142 143 144
		struct sdma_txreq *tx;

		tx = list_first_entry(
145
			&priv->s_iowait.tx_head,
M
Mike Marciniszyn 已提交
146 147 148 149 150 151 152 153
			struct sdma_txreq,
			list);
		list_del_init(&tx->list);
		hfi1_put_txreq(
			container_of(tx, struct verbs_txreq, txreq));
	}
}

154
static void flush_iowait(struct rvt_qp *qp)
M
Mike Marciniszyn 已提交
155
{
156
	struct hfi1_qp_priv *priv = qp->priv;
M
Mike Marciniszyn 已提交
157
	unsigned long flags;
158
	seqlock_t *lock = priv->s_iowait.lock;
M
Mike Marciniszyn 已提交
159

160 161 162
	if (!lock)
		return;
	write_seqlock_irqsave(lock, flags);
163 164
	if (!list_empty(&priv->s_iowait.list)) {
		list_del_init(&priv->s_iowait.list);
165
		priv->s_iowait.lock = NULL;
166
		rvt_put_qp(qp);
M
Mike Marciniszyn 已提交
167
	}
168
	write_sequnlock_irqrestore(lock, flags);
M
Mike Marciniszyn 已提交
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
}

static inline int opa_mtu_enum_to_int(int mtu)
{
	switch (mtu) {
	case OPA_MTU_8192:  return 8192;
	case OPA_MTU_10240: return 10240;
	default:            return -1;
	}
}

/**
 * This function is what we would push to the core layer if we wanted to be a
 * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
 * to blindly pass the MTU enum value from the PathRecord to us.
 */
static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{
187
	int val;
M
Mike Marciniszyn 已提交
188

189 190 191 192
	/* Constraining 10KB packets to 8KB packets */
	if (mtu == (enum ib_mtu)OPA_MTU_10240)
		mtu = OPA_MTU_8192;
	val = opa_mtu_enum_to_int((int)mtu);
M
Mike Marciniszyn 已提交
193 194 195 196 197
	if (val > 0)
		return val;
	return ib_mtu_enum_to_int(mtu);
}

198 199
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
			 int attr_mask, struct ib_udata *udata)
M
Mike Marciniszyn 已提交
200
{
201
	struct ib_qp *ibqp = &qp->ibqp;
M
Mike Marciniszyn 已提交
202
	struct hfi1_ibdev *dev = to_idev(ibqp->device);
203
	struct hfi1_devdata *dd = dd_from_dev(dev);
204
	u8 sc;
M
Mike Marciniszyn 已提交
205 206

	if (attr_mask & IB_QP_AV) {
207
		sc = ah_to_sc(ibqp->device, &attr->ah_attr);
I
Ira Weiny 已提交
208 209 210
		if (sc == 0xf)
			return -EINVAL;

211 212
		if (!qp_to_sdma_engine(qp, sc) &&
		    dd->flags & HFI1_HAS_SEND_DMA)
213
			return -EINVAL;
214 215 216

		if (!qp_to_send_context(qp, sc))
			return -EINVAL;
M
Mike Marciniszyn 已提交
217 218 219
	}

	if (attr_mask & IB_QP_ALT_PATH) {
220
		sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
I
Ira Weiny 已提交
221 222 223
		if (sc == 0xf)
			return -EINVAL;

224 225
		if (!qp_to_sdma_engine(qp, sc) &&
		    dd->flags & HFI1_HAS_SEND_DMA)
226
			return -EINVAL;
227 228 229

		if (!qp_to_send_context(qp, sc))
			return -EINVAL;
M
Mike Marciniszyn 已提交
230 231
	}

232 233
	return 0;
}
M
Mike Marciniszyn 已提交
234

235 236 237 238 239
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
		    int attr_mask, struct ib_udata *udata)
{
	struct ib_qp *ibqp = &qp->ibqp;
	struct hfi1_qp_priv *priv = qp->priv;
M
Mike Marciniszyn 已提交
240 241

	if (attr_mask & IB_QP_AV) {
242 243
		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
244
		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
M
Mike Marciniszyn 已提交
245 246
	}

247 248 249 250 251 252
	if (attr_mask & IB_QP_PATH_MIG_STATE &&
	    attr->path_mig_state == IB_MIG_MIGRATED &&
	    qp->s_mig_state == IB_MIG_ARMED) {
		qp->s_flags |= RVT_S_AHG_CLEAR;
		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
253
		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
M
Mike Marciniszyn 已提交
254 255 256
	}
}

257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
/**
 * hfi1_check_send_wqe - validate wqe
 * @qp - The qp
 * @wqe - The built wqe
 *
 * validate wqe.  This is called
 * prior to inserting the wqe into
 * the ring but after the wqe has been
 * setup.
 *
 * Returns 0 on success, -EINVAL on failure
 *
 */
int hfi1_check_send_wqe(struct rvt_qp *qp,
			struct rvt_swqe *wqe)
I
Ira Weiny 已提交
272 273
{
	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
274
	struct rvt_ah *ah;
I
Ira Weiny 已提交
275

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	switch (qp->ibqp.qp_type) {
	case IB_QPT_RC:
	case IB_QPT_UC:
		if (wqe->length > 0x80000000U)
			return -EINVAL;
		break;
	case IB_QPT_SMI:
		ah = ibah_to_rvtah(wqe->ud_wr.ah);
		if (wqe->length > (1 << ah->log_pmtu))
			return -EINVAL;
		break;
	case IB_QPT_GSI:
	case IB_QPT_UD:
		ah = ibah_to_rvtah(wqe->ud_wr.ah);
		if (wqe->length > (1 << ah->log_pmtu))
			return -EINVAL;
292
		if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
293 294 295
			return -EINVAL;
	default:
		break;
I
Ira Weiny 已提交
296
	}
297
	return wqe->length <= piothreshold;
I
Ira Weiny 已提交
298 299
}

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
/**
 * _hfi1_schedule_send - schedule progress
 * @qp: the QP
 *
 * This schedules qp progress w/o regard to the s_flags.
 *
 * It is only used in the post send, which doesn't hold
 * the s_lock.
 */
void _hfi1_schedule_send(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
	struct hfi1_ibport *ibp =
		to_iport(qp->ibqp.device, qp->port_num);
	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);

	iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
			priv->s_sde ?
			priv->s_sde->cpu :
			cpumask_first(cpumask_of_node(dd->node)));
}

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
static void qp_pio_drain(struct rvt_qp *qp)
{
	struct hfi1_ibdev *dev;
	struct hfi1_qp_priv *priv = qp->priv;

	if (!priv->s_sendcontext)
		return;
	dev = to_idev(qp->ibqp.device);
	while (iowait_pio_pending(&priv->s_iowait)) {
		write_seqlock_irq(&dev->iowait_lock);
		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
		write_sequnlock_irq(&dev->iowait_lock);
		iowait_pio_drain(&priv->s_iowait);
		write_seqlock_irq(&dev->iowait_lock);
		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
		write_sequnlock_irq(&dev->iowait_lock);
	}
}

342 343 344 345 346 347 348 349 350
/**
 * hfi1_schedule_send - schedule progress
 * @qp: the QP
 *
 * This schedules qp progress and caller should hold
 * the s_lock.
 */
void hfi1_schedule_send(struct rvt_qp *qp)
{
351
	lockdep_assert_held(&qp->s_lock);
352 353 354 355
	if (hfi1_send_ok(qp))
		_hfi1_schedule_send(qp);
}

356
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
M
Mike Marciniszyn 已提交
357 358 359 360 361 362 363 364 365 366 367
{
	unsigned long flags;

	spin_lock_irqsave(&qp->s_lock, flags);
	if (qp->s_flags & flag) {
		qp->s_flags &= ~flag;
		trace_hfi1_qpwakeup(qp, flag);
		hfi1_schedule_send(qp);
	}
	spin_unlock_irqrestore(&qp->s_lock, flags);
	/* Notify hfi1_destroy_qp() if it is waiting. */
368
	rvt_put_qp(qp);
M
Mike Marciniszyn 已提交
369 370 371 372 373 374
}

static int iowait_sleep(
	struct sdma_engine *sde,
	struct iowait *wait,
	struct sdma_txreq *stx,
375 376
	uint seq,
	bool pkts_sent)
M
Mike Marciniszyn 已提交
377 378
{
	struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
379
	struct rvt_qp *qp;
380
	struct hfi1_qp_priv *priv;
M
Mike Marciniszyn 已提交
381 382 383 384 385
	unsigned long flags;
	int ret = 0;
	struct hfi1_ibdev *dev;

	qp = tx->qp;
386
	priv = qp->priv;
M
Mike Marciniszyn 已提交
387 388

	spin_lock_irqsave(&qp->s_lock, flags);
389
	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
M
Mike Marciniszyn 已提交
390 391 392 393 394 395 396 397 398 399 400
		/*
		 * If we couldn't queue the DMA request, save the info
		 * and try again later rather than destroying the
		 * buffer and undoing the side effects of the copy.
		 */
		/* Make a common routine? */
		dev = &sde->dd->verbs_dev;
		list_add_tail(&stx->list, &wait->tx_head);
		write_seqlock(&dev->iowait_lock);
		if (sdma_progress(sde, seq, stx))
			goto eagain;
401
		if (list_empty(&priv->s_iowait.list)) {
M
Mike Marciniszyn 已提交
402 403 404
			struct hfi1_ibport *ibp =
				to_iport(qp->ibqp.device, qp->port_num);

405
			ibp->rvp.n_dmawait++;
406
			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
407 408
			iowait_queue(pkts_sent, &priv->s_iowait,
				     &sde->dmawait);
409
			priv->s_iowait.lock = &dev->iowait_lock;
410
			trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
411
			rvt_get_qp(qp);
M
Mike Marciniszyn 已提交
412 413
		}
		write_sequnlock(&dev->iowait_lock);
414
		qp->s_flags &= ~RVT_S_BUSY;
M
Mike Marciniszyn 已提交
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
		spin_unlock_irqrestore(&qp->s_lock, flags);
		ret = -EBUSY;
	} else {
		spin_unlock_irqrestore(&qp->s_lock, flags);
		hfi1_put_txreq(tx);
	}
	return ret;
eagain:
	write_sequnlock(&dev->iowait_lock);
	spin_unlock_irqrestore(&qp->s_lock, flags);
	list_del_init(&stx->list);
	return -EAGAIN;
}

static void iowait_wakeup(struct iowait *wait, int reason)
{
431
	struct rvt_qp *qp = iowait_to_qp(wait);
M
Mike Marciniszyn 已提交
432 433

	WARN_ON(reason != SDMA_AVAIL_REASON);
434
	hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
M
Mike Marciniszyn 已提交
435 436
}

437 438 439
static void iowait_sdma_drained(struct iowait *wait)
{
	struct rvt_qp *qp = iowait_to_qp(wait);
440
	unsigned long flags;
441 442 443 444 445 446 447

	/*
	 * This happens when the send engine notes
	 * a QP in the error state and cannot
	 * do the flush work until that QP's
	 * sdma work has finished.
	 */
448
	spin_lock_irqsave(&qp->s_lock, flags);
449 450 451 452
	if (qp->s_flags & RVT_S_WAIT_DMA) {
		qp->s_flags &= ~RVT_S_WAIT_DMA;
		hfi1_schedule_send(qp);
	}
453
	spin_unlock_irqrestore(&qp->s_lock, flags);
454 455
}

M
Mike Marciniszyn 已提交
456 457 458 459 460 461 462 463 464
/**
 *
 * qp_to_sdma_engine - map a qp to a send engine
 * @qp: the QP
 * @sc5: the 5 bit sc
 *
 * Return:
 * A send engine for the qp or NULL for SMI type qp.
 */
465
struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
M
Mike Marciniszyn 已提交
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
{
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
	struct sdma_engine *sde;

	if (!(dd->flags & HFI1_HAS_SEND_DMA))
		return NULL;
	switch (qp->ibqp.qp_type) {
	case IB_QPT_SMI:
		return NULL;
	default:
		break;
	}
	sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5);
	return sde;
}

482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
/*
 * qp_to_send_context - map a qp to a send context
 * @qp: the QP
 * @sc5: the 5 bit sc
 *
 * Return:
 * A send context for the qp
 */
struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
{
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);

	switch (qp->ibqp.qp_type) {
	case IB_QPT_SMI:
		/* SMA packets to VL15 */
		return dd->vld[15].sc;
	default:
		break;
	}

	return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
					  sc5);
}

M
Mike Marciniszyn 已提交
506 507
struct qp_iter {
	struct hfi1_ibdev *dev;
508
	struct rvt_qp *qp;
M
Mike Marciniszyn 已提交
509 510 511 512 513 514 515 516 517 518 519 520 521
	int specials;
	int n;
};

struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
{
	struct qp_iter *iter;

	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
	if (!iter)
		return NULL;

	iter->dev = dev;
522
	iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
M
Mike Marciniszyn 已提交
523 524 525 526 527 528 529 530 531

	return iter;
}

int qp_iter_next(struct qp_iter *iter)
{
	struct hfi1_ibdev *dev = iter->dev;
	int n = iter->n;
	int ret = 1;
532 533
	struct rvt_qp *pqp = iter->qp;
	struct rvt_qp *qp;
M
Mike Marciniszyn 已提交
534 535 536 537 538 539 540 541 542 543 544

	/*
	 * The approach is to consider the special qps
	 * as an additional table entries before the
	 * real hash table.  Since the qp code sets
	 * the qp->next hash link to NULL, this works just fine.
	 *
	 * iter->specials is 2 * # ports
	 *
	 * n = 0..iter->specials is the special qp indices
	 *
545
	 * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are
M
Mike Marciniszyn 已提交
546 547 548
	 * the potential hash bucket entries
	 *
	 */
549
	for (; n <  dev->rdi.qp_dev->qp_table_size + iter->specials; n++) {
M
Mike Marciniszyn 已提交
550 551 552 553 554 555 556 557
		if (pqp) {
			qp = rcu_dereference(pqp->next);
		} else {
			if (n < iter->specials) {
				struct hfi1_pportdata *ppd;
				struct hfi1_ibport *ibp;
				int pidx;

558
				pidx = n % dev->rdi.ibdev.phys_port_cnt;
M
Mike Marciniszyn 已提交
559 560 561 562
				ppd = &dd_from_dev(dev)->pport[pidx];
				ibp = &ppd->ibport_data;

				if (!(n & 1))
563
					qp = rcu_dereference(ibp->rvp.qp[0]);
M
Mike Marciniszyn 已提交
564
				else
565
					qp = rcu_dereference(ibp->rvp.qp[1]);
M
Mike Marciniszyn 已提交
566 567
			} else {
				qp = rcu_dereference(
568
					dev->rdi.qp_dev->qp_table[
M
Mike Marciniszyn 已提交
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
						(n - iter->specials)]);
			}
		}
		pqp = qp;
		if (qp) {
			iter->qp = qp;
			iter->n = n;
			return 0;
		}
	}
	return ret;
}

static const char * const qp_type_str[] = {
	"SMI", "GSI", "RC", "UC", "UD",
};

586
static int qp_idle(struct rvt_qp *qp)
M
Mike Marciniszyn 已提交
587 588 589 590 591 592 593 594 595 596
{
	return
		qp->s_last == qp->s_acked &&
		qp->s_acked == qp->s_cur &&
		qp->s_cur == qp->s_tail &&
		qp->s_tail == qp->s_head;
}

void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
{
597 598
	struct rvt_swqe *wqe;
	struct rvt_qp *qp = iter->qp;
599
	struct hfi1_qp_priv *priv = qp->priv;
M
Mike Marciniszyn 已提交
600
	struct sdma_engine *sde;
601
	struct send_context *send_context;
M
Mike Marciniszyn 已提交
602

603
	sde = qp_to_sdma_engine(qp, priv->s_sc);
604
	wqe = rvt_get_swqe_ptr(qp, qp->s_last);
605
	send_context = qp_to_send_context(qp, priv->s_sc);
M
Mike Marciniszyn 已提交
606
	seq_printf(s,
607
		   "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
M
Mike Marciniszyn 已提交
608 609 610 611 612 613 614 615 616
		   iter->n,
		   qp_idle(qp) ? "I" : "B",
		   qp->ibqp.qp_num,
		   atomic_read(&qp->refcount),
		   qp_type_str[qp->ibqp.qp_type],
		   qp->state,
		   wqe ? wqe->wr.opcode : 0,
		   qp->s_hdrwords,
		   qp->s_flags,
617 618
		   iowait_sdma_pending(&priv->s_iowait),
		   iowait_pio_pending(&priv->s_iowait),
619
		   !list_empty(&priv->s_iowait.list),
M
Mike Marciniszyn 已提交
620 621 622 623 624 625
		   qp->timeout,
		   wqe ? wqe->ssn : 0,
		   qp->s_lsn,
		   qp->s_last_psn,
		   qp->s_psn, qp->s_next_psn,
		   qp->s_sending_psn, qp->s_sending_hpsn,
626
		   qp->r_psn,
M
Mike Marciniszyn 已提交
627 628
		   qp->s_last, qp->s_acked, qp->s_cur,
		   qp->s_tail, qp->s_head, qp->s_size,
629
		   qp->s_avail,
630 631 632 633
		   /* ack_queue ring pointers, size */
		   qp->s_tail_ack_queue, qp->r_head_ack_queue,
		   HFI1_MAX_RDMA_ATOMIC,
		   /* remote QP info  */
M
Mike Marciniszyn 已提交
634
		   qp->remote_qpn,
635 636
		   rdma_ah_get_dlid(&qp->remote_ah_attr),
		   rdma_ah_get_sl(&qp->remote_ah_attr),
M
Mike Marciniszyn 已提交
637
		   qp->pmtu,
638
		   qp->s_retry,
M
Mike Marciniszyn 已提交
639 640
		   qp->s_retry_cnt,
		   qp->s_rnr_retry_cnt,
641
		   qp->s_rnr_retry,
M
Mike Marciniszyn 已提交
642
		   sde,
643
		   sde ? sde->this_idx : 0,
644
		   send_context,
645 646
		   send_context ? send_context->sw_index : 0,
		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
647 648
		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
		   qp->pid);
M
Mike Marciniszyn 已提交
649 650
}

651
void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
652 653 654
{
	struct hfi1_qp_priv *priv;

655
	priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node);
656 657 658 659 660
	if (!priv)
		return ERR_PTR(-ENOMEM);

	priv->owner = qp;

661
	priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL,
662 663
				   rdi->dparms.node);
	if (!priv->s_ahg) {
664 665 666
		kfree(priv);
		return ERR_PTR(-ENOMEM);
	}
667 668 669 670 671 672 673
	iowait_init(
		&priv->s_iowait,
		1,
		_hfi1_do_send,
		iowait_sleep,
		iowait_wakeup,
		iowait_sdma_drained);
674 675 676 677 678 679 680
	return priv;
}

void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

681
	kfree(priv->s_ahg);
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
	kfree(priv);
}

unsigned free_all_qps(struct rvt_dev_info *rdi)
{
	struct hfi1_ibdev *verbs_dev = container_of(rdi,
						    struct hfi1_ibdev,
						    rdi);
	struct hfi1_devdata *dd = container_of(verbs_dev,
					       struct hfi1_devdata,
					       verbs_dev);
	int n;
	unsigned qp_inuse = 0;

	for (n = 0; n < dd->num_pports; n++) {
		struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;

		rcu_read_lock();
		if (rcu_dereference(ibp->rvp.qp[0]))
			qp_inuse++;
		if (rcu_dereference(ibp->rvp.qp[1]))
			qp_inuse++;
		rcu_read_unlock();
	}

	return qp_inuse;
}

710 711
void flush_qp_waiters(struct rvt_qp *qp)
{
712
	lockdep_assert_held(&qp->s_lock);
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
	flush_iowait(qp);
}

void stop_send_queue(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	cancel_work_sync(&priv->s_iowait.iowork);
}

void quiesce_qp(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	iowait_sdma_drain(&priv->s_iowait);
728
	qp_pio_drain(qp);
729 730 731
	flush_tx_list(qp);
}

732 733
void notify_qp_reset(struct rvt_qp *qp)
{
734
	qp->r_adefered = 0;
735 736 737
	clear_ahg(qp);
}

738 739 740 741
/*
 * Switch to alternate path.
 * The QP s_lock should be held and interrupts disabled.
 */
742
void hfi1_migrate_qp(struct rvt_qp *qp)
743
{
744
	struct hfi1_qp_priv *priv = qp->priv;
745 746 747 748
	struct ib_event ev;

	qp->s_mig_state = IB_MIG_MIGRATED;
	qp->remote_ah_attr = qp->alt_ah_attr;
749
	qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
750
	qp->s_pkey_index = qp->s_alt_pkey_index;
751
	qp->s_flags |= RVT_S_AHG_CLEAR;
752 753
	priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
754 755 756 757 758 759

	ev.device = qp->ibqp.device;
	ev.element.qp = &qp->ibqp;
	ev.event = IB_EVENT_PATH_MIG;
	qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
}
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778

int mtu_to_path_mtu(u32 mtu)
{
	return mtu_to_enum(mtu, OPA_MTU_8192);
}

u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
{
	u32 mtu;
	struct hfi1_ibdev *verbs_dev = container_of(rdi,
						    struct hfi1_ibdev,
						    rdi);
	struct hfi1_devdata *dd = container_of(verbs_dev,
					       struct hfi1_devdata,
					       verbs_dev);
	struct hfi1_ibport *ibp;
	u8 sc, vl;

	ibp = &dd->pport[qp->port_num - 1].ibport_data;
779
	sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
	vl = sc_to_vlt(dd, sc);

	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
	if (vl < PER_VL_SEND_CONTEXTS)
		mtu = min_t(u32, mtu, dd->vld[vl].mtu);
	return mtu;
}

int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
		       struct ib_qp_attr *attr)
{
	int mtu, pidx = qp->port_num - 1;
	struct hfi1_ibdev *verbs_dev = container_of(rdi,
						    struct hfi1_ibdev,
						    rdi);
	struct hfi1_devdata *dd = container_of(verbs_dev,
					       struct hfi1_devdata,
					       verbs_dev);
	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
	if (mtu == -1)
		return -1; /* values less than 0 are error */

	if (mtu > dd->pport[pidx].ibmtu)
		return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
	else
		return attr->path_mtu;
}

void notify_error_qp(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
811
	seqlock_t *lock = priv->s_iowait.lock;
812

813 814 815 816 817 818 819 820 821 822
	if (lock) {
		write_seqlock(lock);
		if (!list_empty(&priv->s_iowait.list) &&
		    !(qp->s_flags & RVT_S_BUSY)) {
			qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
			list_del_init(&priv->s_iowait.list);
			priv->s_iowait.lock = NULL;
			rvt_put_qp(qp);
		}
		write_sequnlock(lock);
823 824 825 826 827 828 829 830 831 832 833 834
	}

	if (!(qp->s_flags & RVT_S_BUSY)) {
		qp->s_hdrwords = 0;
		if (qp->s_rdma_mr) {
			rvt_put_mr(qp->s_rdma_mr);
			qp->s_rdma_mr = NULL;
		}
		flush_tx_list(qp);
	}
}

835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
/**
 * hfi1_error_port_qps - put a port's RC/UC qps into error state
 * @ibp: the ibport.
 * @sl: the service level.
 *
 * This function places all RC/UC qps with a given service level into error
 * state. It is generally called to force upper lay apps to abandon stale qps
 * after an sl->sc mapping change.
 */
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
{
	struct rvt_qp *qp = NULL;
	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
	struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
	int n;
	int lastwqe;
	struct ib_event ev;

	rcu_read_lock();

	/* Deal only with RC/UC qps that use the given SL. */
	for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) {
		for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp;
			qp = rcu_dereference(qp->next)) {
			if (qp->port_num == ppd->port &&
			    (qp->ibqp.qp_type == IB_QPT_UC ||
			     qp->ibqp.qp_type == IB_QPT_RC) &&
862
			    rdma_ah_get_sl(&qp->remote_ah_attr) == sl &&
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
			    (ib_rvt_state_ops[qp->state] &
			     RVT_POST_SEND_OK)) {
				spin_lock_irq(&qp->r_lock);
				spin_lock(&qp->s_hlock);
				spin_lock(&qp->s_lock);
				lastwqe = rvt_error_qp(qp,
						       IB_WC_WR_FLUSH_ERR);
				spin_unlock(&qp->s_lock);
				spin_unlock(&qp->s_hlock);
				spin_unlock_irq(&qp->r_lock);
				if (lastwqe) {
					ev.device = qp->ibqp.device;
					ev.element.qp = &qp->ibqp;
					ev.event =
						IB_EVENT_QP_LAST_WQE_REACHED;
					qp->ibqp.event_handler(&ev,
						qp->ibqp.qp_context);
				}
			}
		}
	}

	rcu_read_unlock();
}