qp.c 22.0 KB
Newer Older
M
Mike Marciniszyn 已提交
1
/*
J
Jubin John 已提交
2
 * Copyright(c) 2015, 2016 Intel Corporation.
M
Mike Marciniszyn 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
 *
 * GPL LICENSE SUMMARY
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * BSD LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *  - Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *  - Neither the name of Intel Corporation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <linux/err.h>
#include <linux/vmalloc.h>
#include <linux/hash.h>
#include <linux/module.h>
#include <linux/seq_file.h>
53 54
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
55
#include <rdma/ib_verbs.h>
M
Mike Marciniszyn 已提交
56 57 58 59

#include "hfi.h"
#include "qp.h"
#include "trace.h"
60
#include "verbs_txreq.h"
M
Mike Marciniszyn 已提交
61

62
unsigned int hfi1_qp_table_size = 256;
M
Mike Marciniszyn 已提交
63 64 65
module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
MODULE_PARM_DESC(qp_table_size, "QP table size");

66
static void flush_tx_list(struct rvt_qp *qp);
M
Mike Marciniszyn 已提交
67 68 69 70 71 72
static int iowait_sleep(
	struct sdma_engine *sde,
	struct iowait *wait,
	struct sdma_txreq *stx,
	unsigned seq);
static void iowait_wakeup(struct iowait *wait, int reason);
73
static void iowait_sdma_drained(struct iowait *wait);
74
static void qp_pio_drain(struct rvt_qp *qp);
M
Mike Marciniszyn 已提交
75

76 77
static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
			      struct rvt_qpn_map *map, unsigned off)
M
Mike Marciniszyn 已提交
78
{
79
	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
M
Mike Marciniszyn 已提交
80 81
}

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
	.length = sizeof(struct ib_rdma_wr),
	.qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
},

[IB_WR_RDMA_READ] = {
	.length = sizeof(struct ib_rdma_wr),
	.qpt_support = BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_ATOMIC,
},

[IB_WR_ATOMIC_CMP_AND_SWP] = {
	.length = sizeof(struct ib_atomic_wr),
	.qpt_support = BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
},

[IB_WR_ATOMIC_FETCH_AND_ADD] = {
	.length = sizeof(struct ib_atomic_wr),
	.qpt_support = BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
},

[IB_WR_RDMA_WRITE_WITH_IMM] = {
	.length = sizeof(struct ib_rdma_wr),
	.qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
},

[IB_WR_SEND] = {
	.length = sizeof(struct ib_send_wr),
	.qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
		       BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
},

[IB_WR_SEND_WITH_IMM] = {
	.length = sizeof(struct ib_send_wr),
	.qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
		       BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
},

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
[IB_WR_REG_MR] = {
	.length = sizeof(struct ib_reg_wr),
	.qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_LOCAL,
},

[IB_WR_LOCAL_INV] = {
	.length = sizeof(struct ib_send_wr),
	.qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
	.flags = RVT_OPERATION_LOCAL,
},

[IB_WR_SEND_WITH_INV] = {
	.length = sizeof(struct ib_send_wr),
	.qpt_support = BIT(IB_QPT_RC),
},

140 141
};

142
static void flush_tx_list(struct rvt_qp *qp)
M
Mike Marciniszyn 已提交
143
{
144 145 146
	struct hfi1_qp_priv *priv = qp->priv;

	while (!list_empty(&priv->s_iowait.tx_head)) {
M
Mike Marciniszyn 已提交
147 148 149
		struct sdma_txreq *tx;

		tx = list_first_entry(
150
			&priv->s_iowait.tx_head,
M
Mike Marciniszyn 已提交
151 152 153 154 155 156 157 158
			struct sdma_txreq,
			list);
		list_del_init(&tx->list);
		hfi1_put_txreq(
			container_of(tx, struct verbs_txreq, txreq));
	}
}

159
static void flush_iowait(struct rvt_qp *qp)
M
Mike Marciniszyn 已提交
160
{
161
	struct hfi1_qp_priv *priv = qp->priv;
M
Mike Marciniszyn 已提交
162
	unsigned long flags;
163
	seqlock_t *lock = priv->s_iowait.lock;
M
Mike Marciniszyn 已提交
164

165 166 167
	if (!lock)
		return;
	write_seqlock_irqsave(lock, flags);
168 169
	if (!list_empty(&priv->s_iowait.list)) {
		list_del_init(&priv->s_iowait.list);
170
		priv->s_iowait.lock = NULL;
171
		rvt_put_qp(qp);
M
Mike Marciniszyn 已提交
172
	}
173
	write_sequnlock_irqrestore(lock, flags);
M
Mike Marciniszyn 已提交
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
}

static inline int opa_mtu_enum_to_int(int mtu)
{
	switch (mtu) {
	case OPA_MTU_8192:  return 8192;
	case OPA_MTU_10240: return 10240;
	default:            return -1;
	}
}

/**
 * This function is what we would push to the core layer if we wanted to be a
 * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
 * to blindly pass the MTU enum value from the PathRecord to us.
 */
static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{
192
	int val;
M
Mike Marciniszyn 已提交
193

194 195 196 197
	/* Constraining 10KB packets to 8KB packets */
	if (mtu == (enum ib_mtu)OPA_MTU_10240)
		mtu = OPA_MTU_8192;
	val = opa_mtu_enum_to_int((int)mtu);
M
Mike Marciniszyn 已提交
198 199 200 201 202
	if (val > 0)
		return val;
	return ib_mtu_enum_to_int(mtu);
}

203 204
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
			 int attr_mask, struct ib_udata *udata)
M
Mike Marciniszyn 已提交
205
{
206
	struct ib_qp *ibqp = &qp->ibqp;
M
Mike Marciniszyn 已提交
207
	struct hfi1_ibdev *dev = to_idev(ibqp->device);
208
	struct hfi1_devdata *dd = dd_from_dev(dev);
209
	u8 sc;
M
Mike Marciniszyn 已提交
210 211

	if (attr_mask & IB_QP_AV) {
212
		sc = ah_to_sc(ibqp->device, &attr->ah_attr);
I
Ira Weiny 已提交
213 214 215
		if (sc == 0xf)
			return -EINVAL;

216 217
		if (!qp_to_sdma_engine(qp, sc) &&
		    dd->flags & HFI1_HAS_SEND_DMA)
218
			return -EINVAL;
219 220 221

		if (!qp_to_send_context(qp, sc))
			return -EINVAL;
M
Mike Marciniszyn 已提交
222 223 224
	}

	if (attr_mask & IB_QP_ALT_PATH) {
225
		sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
I
Ira Weiny 已提交
226 227 228
		if (sc == 0xf)
			return -EINVAL;

229 230
		if (!qp_to_sdma_engine(qp, sc) &&
		    dd->flags & HFI1_HAS_SEND_DMA)
231
			return -EINVAL;
232 233 234

		if (!qp_to_send_context(qp, sc))
			return -EINVAL;
M
Mike Marciniszyn 已提交
235 236
	}

237 238
	return 0;
}
M
Mike Marciniszyn 已提交
239

240 241 242 243 244
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
		    int attr_mask, struct ib_udata *udata)
{
	struct ib_qp *ibqp = &qp->ibqp;
	struct hfi1_qp_priv *priv = qp->priv;
M
Mike Marciniszyn 已提交
245 246

	if (attr_mask & IB_QP_AV) {
247 248
		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
249
		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
M
Mike Marciniszyn 已提交
250 251
	}

252 253 254 255 256 257
	if (attr_mask & IB_QP_PATH_MIG_STATE &&
	    attr->path_mig_state == IB_MIG_MIGRATED &&
	    qp->s_mig_state == IB_MIG_ARMED) {
		qp->s_flags |= RVT_S_AHG_CLEAR;
		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
258
		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
M
Mike Marciniszyn 已提交
259 260 261
	}
}

262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
/**
 * hfi1_check_send_wqe - validate wqe
 * @qp - The qp
 * @wqe - The built wqe
 *
 * validate wqe.  This is called
 * prior to inserting the wqe into
 * the ring but after the wqe has been
 * setup.
 *
 * Returns 0 on success, -EINVAL on failure
 *
 */
int hfi1_check_send_wqe(struct rvt_qp *qp,
			struct rvt_swqe *wqe)
I
Ira Weiny 已提交
277 278
{
	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
279
	struct rvt_ah *ah;
I
Ira Weiny 已提交
280

281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
	switch (qp->ibqp.qp_type) {
	case IB_QPT_RC:
	case IB_QPT_UC:
		if (wqe->length > 0x80000000U)
			return -EINVAL;
		break;
	case IB_QPT_SMI:
		ah = ibah_to_rvtah(wqe->ud_wr.ah);
		if (wqe->length > (1 << ah->log_pmtu))
			return -EINVAL;
		break;
	case IB_QPT_GSI:
	case IB_QPT_UD:
		ah = ibah_to_rvtah(wqe->ud_wr.ah);
		if (wqe->length > (1 << ah->log_pmtu))
			return -EINVAL;
		if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
			return -EINVAL;
	default:
		break;
I
Ira Weiny 已提交
301
	}
302
	return wqe->length <= piothreshold;
I
Ira Weiny 已提交
303 304
}

305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
/**
 * _hfi1_schedule_send - schedule progress
 * @qp: the QP
 *
 * This schedules qp progress w/o regard to the s_flags.
 *
 * It is only used in the post send, which doesn't hold
 * the s_lock.
 */
void _hfi1_schedule_send(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
	struct hfi1_ibport *ibp =
		to_iport(qp->ibqp.device, qp->port_num);
	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);

	iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
			priv->s_sde ?
			priv->s_sde->cpu :
			cpumask_first(cpumask_of_node(dd->node)));
}

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
static void qp_pio_drain(struct rvt_qp *qp)
{
	struct hfi1_ibdev *dev;
	struct hfi1_qp_priv *priv = qp->priv;

	if (!priv->s_sendcontext)
		return;
	dev = to_idev(qp->ibqp.device);
	while (iowait_pio_pending(&priv->s_iowait)) {
		write_seqlock_irq(&dev->iowait_lock);
		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
		write_sequnlock_irq(&dev->iowait_lock);
		iowait_pio_drain(&priv->s_iowait);
		write_seqlock_irq(&dev->iowait_lock);
		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
		write_sequnlock_irq(&dev->iowait_lock);
	}
}

347 348 349 350 351 352 353 354 355
/**
 * hfi1_schedule_send - schedule progress
 * @qp: the QP
 *
 * This schedules qp progress and caller should hold
 * the s_lock.
 */
void hfi1_schedule_send(struct rvt_qp *qp)
{
356
	lockdep_assert_held(&qp->s_lock);
357 358 359 360
	if (hfi1_send_ok(qp))
		_hfi1_schedule_send(qp);
}

361
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
M
Mike Marciniszyn 已提交
362 363 364 365 366 367 368 369 370 371 372
{
	unsigned long flags;

	spin_lock_irqsave(&qp->s_lock, flags);
	if (qp->s_flags & flag) {
		qp->s_flags &= ~flag;
		trace_hfi1_qpwakeup(qp, flag);
		hfi1_schedule_send(qp);
	}
	spin_unlock_irqrestore(&qp->s_lock, flags);
	/* Notify hfi1_destroy_qp() if it is waiting. */
373
	rvt_put_qp(qp);
M
Mike Marciniszyn 已提交
374 375 376 377 378 379 380 381 382
}

static int iowait_sleep(
	struct sdma_engine *sde,
	struct iowait *wait,
	struct sdma_txreq *stx,
	unsigned seq)
{
	struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
383
	struct rvt_qp *qp;
384
	struct hfi1_qp_priv *priv;
M
Mike Marciniszyn 已提交
385 386 387 388 389
	unsigned long flags;
	int ret = 0;
	struct hfi1_ibdev *dev;

	qp = tx->qp;
390
	priv = qp->priv;
M
Mike Marciniszyn 已提交
391 392

	spin_lock_irqsave(&qp->s_lock, flags);
393
	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
M
Mike Marciniszyn 已提交
394 395 396 397 398 399 400 401 402 403 404
		/*
		 * If we couldn't queue the DMA request, save the info
		 * and try again later rather than destroying the
		 * buffer and undoing the side effects of the copy.
		 */
		/* Make a common routine? */
		dev = &sde->dd->verbs_dev;
		list_add_tail(&stx->list, &wait->tx_head);
		write_seqlock(&dev->iowait_lock);
		if (sdma_progress(sde, seq, stx))
			goto eagain;
405
		if (list_empty(&priv->s_iowait.list)) {
M
Mike Marciniszyn 已提交
406 407 408
			struct hfi1_ibport *ibp =
				to_iport(qp->ibqp.device, qp->port_num);

409
			ibp->rvp.n_dmawait++;
410
			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
411
			list_add_tail(&priv->s_iowait.list, &sde->dmawait);
412
			priv->s_iowait.lock = &dev->iowait_lock;
413
			trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
414
			rvt_get_qp(qp);
M
Mike Marciniszyn 已提交
415 416
		}
		write_sequnlock(&dev->iowait_lock);
417
		qp->s_flags &= ~RVT_S_BUSY;
M
Mike Marciniszyn 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
		spin_unlock_irqrestore(&qp->s_lock, flags);
		ret = -EBUSY;
	} else {
		spin_unlock_irqrestore(&qp->s_lock, flags);
		hfi1_put_txreq(tx);
	}
	return ret;
eagain:
	write_sequnlock(&dev->iowait_lock);
	spin_unlock_irqrestore(&qp->s_lock, flags);
	list_del_init(&stx->list);
	return -EAGAIN;
}

static void iowait_wakeup(struct iowait *wait, int reason)
{
434
	struct rvt_qp *qp = iowait_to_qp(wait);
M
Mike Marciniszyn 已提交
435 436

	WARN_ON(reason != SDMA_AVAIL_REASON);
437
	hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
M
Mike Marciniszyn 已提交
438 439
}

440 441 442
static void iowait_sdma_drained(struct iowait *wait)
{
	struct rvt_qp *qp = iowait_to_qp(wait);
443
	unsigned long flags;
444 445 446 447 448 449 450

	/*
	 * This happens when the send engine notes
	 * a QP in the error state and cannot
	 * do the flush work until that QP's
	 * sdma work has finished.
	 */
451
	spin_lock_irqsave(&qp->s_lock, flags);
452 453 454 455
	if (qp->s_flags & RVT_S_WAIT_DMA) {
		qp->s_flags &= ~RVT_S_WAIT_DMA;
		hfi1_schedule_send(qp);
	}
456
	spin_unlock_irqrestore(&qp->s_lock, flags);
457 458
}

M
Mike Marciniszyn 已提交
459 460 461 462 463 464 465 466 467
/**
 *
 * qp_to_sdma_engine - map a qp to a send engine
 * @qp: the QP
 * @sc5: the 5 bit sc
 *
 * Return:
 * A send engine for the qp or NULL for SMI type qp.
 */
468
struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
M
Mike Marciniszyn 已提交
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
{
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
	struct sdma_engine *sde;

	if (!(dd->flags & HFI1_HAS_SEND_DMA))
		return NULL;
	switch (qp->ibqp.qp_type) {
	case IB_QPT_SMI:
		return NULL;
	default:
		break;
	}
	sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5);
	return sde;
}

485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
/*
 * qp_to_send_context - map a qp to a send context
 * @qp: the QP
 * @sc5: the 5 bit sc
 *
 * Return:
 * A send context for the qp
 */
struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
{
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);

	switch (qp->ibqp.qp_type) {
	case IB_QPT_SMI:
		/* SMA packets to VL15 */
		return dd->vld[15].sc;
	default:
		break;
	}

	return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
					  sc5);
}

M
Mike Marciniszyn 已提交
509 510
struct qp_iter {
	struct hfi1_ibdev *dev;
511
	struct rvt_qp *qp;
M
Mike Marciniszyn 已提交
512 513 514 515 516 517 518 519 520 521 522 523 524
	int specials;
	int n;
};

struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
{
	struct qp_iter *iter;

	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
	if (!iter)
		return NULL;

	iter->dev = dev;
525
	iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
M
Mike Marciniszyn 已提交
526 527 528 529 530 531 532 533 534

	return iter;
}

int qp_iter_next(struct qp_iter *iter)
{
	struct hfi1_ibdev *dev = iter->dev;
	int n = iter->n;
	int ret = 1;
535 536
	struct rvt_qp *pqp = iter->qp;
	struct rvt_qp *qp;
M
Mike Marciniszyn 已提交
537 538 539 540 541 542 543 544 545 546 547

	/*
	 * The approach is to consider the special qps
	 * as an additional table entries before the
	 * real hash table.  Since the qp code sets
	 * the qp->next hash link to NULL, this works just fine.
	 *
	 * iter->specials is 2 * # ports
	 *
	 * n = 0..iter->specials is the special qp indices
	 *
548
	 * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are
M
Mike Marciniszyn 已提交
549 550 551
	 * the potential hash bucket entries
	 *
	 */
552
	for (; n <  dev->rdi.qp_dev->qp_table_size + iter->specials; n++) {
M
Mike Marciniszyn 已提交
553 554 555 556 557 558 559 560
		if (pqp) {
			qp = rcu_dereference(pqp->next);
		} else {
			if (n < iter->specials) {
				struct hfi1_pportdata *ppd;
				struct hfi1_ibport *ibp;
				int pidx;

561
				pidx = n % dev->rdi.ibdev.phys_port_cnt;
M
Mike Marciniszyn 已提交
562 563 564 565
				ppd = &dd_from_dev(dev)->pport[pidx];
				ibp = &ppd->ibport_data;

				if (!(n & 1))
566
					qp = rcu_dereference(ibp->rvp.qp[0]);
M
Mike Marciniszyn 已提交
567
				else
568
					qp = rcu_dereference(ibp->rvp.qp[1]);
M
Mike Marciniszyn 已提交
569 570
			} else {
				qp = rcu_dereference(
571
					dev->rdi.qp_dev->qp_table[
M
Mike Marciniszyn 已提交
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
						(n - iter->specials)]);
			}
		}
		pqp = qp;
		if (qp) {
			iter->qp = qp;
			iter->n = n;
			return 0;
		}
	}
	return ret;
}

static const char * const qp_type_str[] = {
	"SMI", "GSI", "RC", "UC", "UD",
};

589
static int qp_idle(struct rvt_qp *qp)
M
Mike Marciniszyn 已提交
590 591 592 593 594 595 596 597 598 599
{
	return
		qp->s_last == qp->s_acked &&
		qp->s_acked == qp->s_cur &&
		qp->s_cur == qp->s_tail &&
		qp->s_tail == qp->s_head;
}

void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
{
600 601
	struct rvt_swqe *wqe;
	struct rvt_qp *qp = iter->qp;
602
	struct hfi1_qp_priv *priv = qp->priv;
M
Mike Marciniszyn 已提交
603
	struct sdma_engine *sde;
604
	struct send_context *send_context;
M
Mike Marciniszyn 已提交
605

606
	sde = qp_to_sdma_engine(qp, priv->s_sc);
607
	wqe = rvt_get_swqe_ptr(qp, qp->s_last);
608
	send_context = qp_to_send_context(qp, priv->s_sc);
M
Mike Marciniszyn 已提交
609
	seq_printf(s,
610
		   "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
M
Mike Marciniszyn 已提交
611 612 613 614 615 616 617 618 619
		   iter->n,
		   qp_idle(qp) ? "I" : "B",
		   qp->ibqp.qp_num,
		   atomic_read(&qp->refcount),
		   qp_type_str[qp->ibqp.qp_type],
		   qp->state,
		   wqe ? wqe->wr.opcode : 0,
		   qp->s_hdrwords,
		   qp->s_flags,
620 621
		   iowait_sdma_pending(&priv->s_iowait),
		   iowait_pio_pending(&priv->s_iowait),
622
		   !list_empty(&priv->s_iowait.list),
M
Mike Marciniszyn 已提交
623 624 625 626 627 628
		   qp->timeout,
		   wqe ? wqe->ssn : 0,
		   qp->s_lsn,
		   qp->s_last_psn,
		   qp->s_psn, qp->s_next_psn,
		   qp->s_sending_psn, qp->s_sending_hpsn,
629
		   qp->r_psn,
M
Mike Marciniszyn 已提交
630 631
		   qp->s_last, qp->s_acked, qp->s_cur,
		   qp->s_tail, qp->s_head, qp->s_size,
632
		   qp->s_avail,
M
Mike Marciniszyn 已提交
633 634 635 636
		   qp->remote_qpn,
		   qp->remote_ah_attr.dlid,
		   qp->remote_ah_attr.sl,
		   qp->pmtu,
637
		   qp->s_retry,
M
Mike Marciniszyn 已提交
638 639
		   qp->s_retry_cnt,
		   qp->s_rnr_retry_cnt,
640
		   qp->s_rnr_retry,
M
Mike Marciniszyn 已提交
641
		   sde,
642
		   sde ? sde->this_idx : 0,
643
		   send_context,
644 645
		   send_context ? send_context->sw_index : 0,
		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
646 647
		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
		   qp->pid);
M
Mike Marciniszyn 已提交
648 649
}

650 651 652 653 654
void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
		    gfp_t gfp)
{
	struct hfi1_qp_priv *priv;

655
	priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
656 657 658 659 660
	if (!priv)
		return ERR_PTR(-ENOMEM);

	priv->owner = qp;

661 662 663
	priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp,
				   rdi->dparms.node);
	if (!priv->s_ahg) {
664 665 666
		kfree(priv);
		return ERR_PTR(-ENOMEM);
	}
667 668 669 670 671 672 673
	iowait_init(
		&priv->s_iowait,
		1,
		_hfi1_do_send,
		iowait_sleep,
		iowait_wakeup,
		iowait_sdma_drained);
674 675 676 677 678 679 680
	return priv;
}

void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

681
	kfree(priv->s_ahg);
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
	kfree(priv);
}

unsigned free_all_qps(struct rvt_dev_info *rdi)
{
	struct hfi1_ibdev *verbs_dev = container_of(rdi,
						    struct hfi1_ibdev,
						    rdi);
	struct hfi1_devdata *dd = container_of(verbs_dev,
					       struct hfi1_devdata,
					       verbs_dev);
	int n;
	unsigned qp_inuse = 0;

	for (n = 0; n < dd->num_pports; n++) {
		struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;

		rcu_read_lock();
		if (rcu_dereference(ibp->rvp.qp[0]))
			qp_inuse++;
		if (rcu_dereference(ibp->rvp.qp[1]))
			qp_inuse++;
		rcu_read_unlock();
	}

	return qp_inuse;
}

710 711
void flush_qp_waiters(struct rvt_qp *qp)
{
712
	lockdep_assert_held(&qp->s_lock);
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
	flush_iowait(qp);
}

void stop_send_queue(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	cancel_work_sync(&priv->s_iowait.iowork);
}

void quiesce_qp(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	iowait_sdma_drain(&priv->s_iowait);
728
	qp_pio_drain(qp);
729 730 731
	flush_tx_list(qp);
}

732 733 734 735 736 737 738 739
void notify_qp_reset(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	priv->r_adefered = 0;
	clear_ahg(qp);
}

740 741 742 743
/*
 * Switch to alternate path.
 * The QP s_lock should be held and interrupts disabled.
 */
744
void hfi1_migrate_qp(struct rvt_qp *qp)
745
{
746
	struct hfi1_qp_priv *priv = qp->priv;
747 748 749 750 751 752
	struct ib_event ev;

	qp->s_mig_state = IB_MIG_MIGRATED;
	qp->remote_ah_attr = qp->alt_ah_attr;
	qp->port_num = qp->alt_ah_attr.port_num;
	qp->s_pkey_index = qp->s_alt_pkey_index;
753
	qp->s_flags |= RVT_S_AHG_CLEAR;
754 755
	priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
756 757 758 759 760 761

	ev.device = qp->ibqp.device;
	ev.element.qp = &qp->ibqp;
	ev.event = IB_EVENT_PATH_MIG;
	qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
}
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812

int mtu_to_path_mtu(u32 mtu)
{
	return mtu_to_enum(mtu, OPA_MTU_8192);
}

u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
{
	u32 mtu;
	struct hfi1_ibdev *verbs_dev = container_of(rdi,
						    struct hfi1_ibdev,
						    rdi);
	struct hfi1_devdata *dd = container_of(verbs_dev,
					       struct hfi1_devdata,
					       verbs_dev);
	struct hfi1_ibport *ibp;
	u8 sc, vl;

	ibp = &dd->pport[qp->port_num - 1].ibport_data;
	sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
	vl = sc_to_vlt(dd, sc);

	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
	if (vl < PER_VL_SEND_CONTEXTS)
		mtu = min_t(u32, mtu, dd->vld[vl].mtu);
	return mtu;
}

int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
		       struct ib_qp_attr *attr)
{
	int mtu, pidx = qp->port_num - 1;
	struct hfi1_ibdev *verbs_dev = container_of(rdi,
						    struct hfi1_ibdev,
						    rdi);
	struct hfi1_devdata *dd = container_of(verbs_dev,
					       struct hfi1_devdata,
					       verbs_dev);
	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
	if (mtu == -1)
		return -1; /* values less than 0 are error */

	if (mtu > dd->pport[pidx].ibmtu)
		return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
	else
		return attr->path_mtu;
}

void notify_error_qp(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
813
	seqlock_t *lock = priv->s_iowait.lock;
814

815 816 817 818 819 820 821 822 823 824
	if (lock) {
		write_seqlock(lock);
		if (!list_empty(&priv->s_iowait.list) &&
		    !(qp->s_flags & RVT_S_BUSY)) {
			qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
			list_del_init(&priv->s_iowait.list);
			priv->s_iowait.lock = NULL;
			rvt_put_qp(qp);
		}
		write_sequnlock(lock);
825 826 827 828 829 830 831 832 833 834 835 836
	}

	if (!(qp->s_flags & RVT_S_BUSY)) {
		qp->s_hdrwords = 0;
		if (qp->s_rdma_mr) {
			rvt_put_mr(qp->s_rdma_mr);
			qp->s_rdma_mr = NULL;
		}
		flush_tx_list(qp);
	}
}

837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
/**
 * hfi1_error_port_qps - put a port's RC/UC qps into error state
 * @ibp: the ibport.
 * @sl: the service level.
 *
 * This function places all RC/UC qps with a given service level into error
 * state. It is generally called to force upper lay apps to abandon stale qps
 * after an sl->sc mapping change.
 */
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
{
	struct rvt_qp *qp = NULL;
	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
	struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
	int n;
	int lastwqe;
	struct ib_event ev;

	rcu_read_lock();

	/* Deal only with RC/UC qps that use the given SL. */
	for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) {
		for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp;
			qp = rcu_dereference(qp->next)) {
			if (qp->port_num == ppd->port &&
			    (qp->ibqp.qp_type == IB_QPT_UC ||
			     qp->ibqp.qp_type == IB_QPT_RC) &&
			    qp->remote_ah_attr.sl == sl &&
			    (ib_rvt_state_ops[qp->state] &
			     RVT_POST_SEND_OK)) {
				spin_lock_irq(&qp->r_lock);
				spin_lock(&qp->s_hlock);
				spin_lock(&qp->s_lock);
				lastwqe = rvt_error_qp(qp,
						       IB_WC_WR_FLUSH_ERR);
				spin_unlock(&qp->s_lock);
				spin_unlock(&qp->s_hlock);
				spin_unlock_irq(&qp->r_lock);
				if (lastwqe) {
					ev.device = qp->ibqp.device;
					ev.element.qp = &qp->ibqp;
					ev.event =
						IB_EVENT_QP_LAST_WQE_REACHED;
					qp->ibqp.event_handler(&ev,
						qp->ibqp.qp_context);
				}
			}
		}
	}

	rcu_read_unlock();
}