ipath_ruc.c 19.3 KB
Newer Older
1
/*
2
 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

34 35
#include <linux/spinlock.h>

36
#include "ipath_verbs.h"
37
#include "ipath_kernel.h"
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

/*
 * Convert the AETH RNR timeout code into the number of milliseconds.
 */
const u32 ib_ipath_rnr_table[32] = {
	656,			/* 0 */
	1,			/* 1 */
	1,			/* 2 */
	1,			/* 3 */
	1,			/* 4 */
	1,			/* 5 */
	1,			/* 6 */
	1,			/* 7 */
	1,			/* 8 */
	1,			/* 9 */
	1,			/* A */
	1,			/* B */
	1,			/* C */
	1,			/* D */
	2,			/* E */
	2,			/* F */
	3,			/* 10 */
	4,			/* 11 */
	6,			/* 12 */
	8,			/* 13 */
	11,			/* 14 */
	16,			/* 15 */
	21,			/* 16 */
	31,			/* 17 */
	41,			/* 18 */
	62,			/* 19 */
	82,			/* 1A */
	123,			/* 1B */
	164,			/* 1C */
	246,			/* 1D */
	328,			/* 1E */
	492			/* 1F */
};

/**
 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
 * @qp: the QP
 *
81
 * Called with the QP s_lock held and interrupts disabled.
82 83 84 85 86 87 88 89
 * XXX Use a simple list for now.  We might need a priority
 * queue if we have lots of QPs waiting for RNR timeouts
 * but that should be rare.
 */
void ipath_insert_rnr_queue(struct ipath_qp *qp)
{
	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);

90 91
	/* We already did a spin_lock_irqsave(), so just use spin_lock */
	spin_lock(&dev->pending_lock);
92 93 94 95 96 97 98 99 100 101
	if (list_empty(&dev->rnrwait))
		list_add(&qp->timerwait, &dev->rnrwait);
	else {
		struct list_head *l = &dev->rnrwait;
		struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
						  timerwait);

		while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
			qp->s_rnr_timeout -= nqp->s_rnr_timeout;
			l = l->next;
R
Ralph Campbell 已提交
102 103
			if (l->next == &dev->rnrwait) {
				nqp = NULL;
104
				break;
R
Ralph Campbell 已提交
105
			}
106 107 108
			nqp = list_entry(l->next, struct ipath_qp,
					 timerwait);
		}
R
Ralph Campbell 已提交
109 110
		if (nqp)
			nqp->s_rnr_timeout -= qp->s_rnr_timeout;
111 112
		list_add(&qp->timerwait, l);
	}
113
	spin_unlock(&dev->pending_lock);
114 115
}

116 117 118 119 120 121 122 123
/**
 * ipath_init_sge - Validate a RWQE and fill in the SGE state
 * @qp: the QP
 *
 * Return 1 if OK.
 */
int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
		   u32 *lengthp, struct ipath_sge_state *ss)
124 125 126 127
{
	int i, j, ret;
	struct ib_wc wc;

128
	*lengthp = 0;
129 130 131 132
	for (i = j = 0; i < wqe->num_sge; i++) {
		if (wqe->sg_list[i].length == 0)
			continue;
		/* Check LKEY */
133 134
		if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
135
			goto bad_lkey;
136
		*lengthp += wqe->sg_list[i].length;
137 138
		j++;
	}
139
	ss->num_sge = j;
140 141 142 143
	ret = 1;
	goto bail;

bad_lkey:
144
	memset(&wc, 0, sizeof(wc));
145 146 147
	wc.wr_id = wqe->wr_id;
	wc.status = IB_WC_LOC_PROT_ERR;
	wc.opcode = IB_WC_RECV;
148
	wc.qp = &qp->ibqp;
149 150 151 152 153 154 155
	/* Signal solicited completion event. */
	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
	ret = 0;
bail:
	return ret;
}

156 157 158 159 160 161 162
/**
 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
 * @qp: the QP
 * @wr_id_only: update wr_id only, not SGEs
 *
 * Return 0 if no RWQE is available, otherwise return 1.
 *
163
 * Can be called from interrupt level.
164 165 166
 */
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
{
167
	unsigned long flags;
168
	struct ipath_rq *rq;
169
	struct ipath_rwq *wq;
170 171
	struct ipath_srq *srq;
	struct ipath_rwqe *wqe;
172 173 174
	void (*handler)(struct ib_event *, void *);
	u32 tail;
	int ret;
175

176 177
	qp->r_sge.sg_list = qp->r_sg_list;

178 179 180 181 182 183 184
	if (qp->ibqp.srq) {
		srq = to_isrq(qp->ibqp.srq);
		handler = srq->ibsrq.event_handler;
		rq = &srq->rq;
	} else {
		srq = NULL;
		handler = NULL;
185 186 187
		rq = &qp->r_rq;
	}

188
	spin_lock_irqsave(&rq->lock, flags);
189 190 191 192 193
	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
		ret = 0;
		goto unlock;
	}

194 195 196 197 198 199 200 201
	wq = rq->wq;
	tail = wq->tail;
	/* Validate tail before using it since it is user writable. */
	if (tail >= rq->size)
		tail = 0;
	do {
		if (unlikely(tail == wq->head)) {
			ret = 0;
202
			goto unlock;
203
		}
204 205
		/* Make sure entry is read after head index is read. */
		smp_rmb();
206 207 208
		wqe = get_rwqe_ptr(rq, tail);
		if (++tail >= rq->size)
			tail = 0;
209 210
	} while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len,
						&qp->r_sge));
211
	qp->r_wr_id = wqe->wr_id;
212 213 214
	wq->tail = tail;

	ret = 1;
215
	set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
216
	if (handler) {
217 218
		u32 n;

219 220 221 222 223 224 225 226 227
		/*
		 * validate head pointer value and compute
		 * the number of remaining WQEs.
		 */
		n = wq->head;
		if (n >= rq->size)
			n = 0;
		if (n < tail)
			n += rq->size - tail;
228
		else
229
			n -= tail;
230
		if (n < srq->limit) {
231 232
			struct ib_event ev;

233
			srq->limit = 0;
234
			spin_unlock_irqrestore(&rq->lock, flags);
235 236 237
			ev.device = qp->ibqp.device;
			ev.element.srq = qp->ibqp.srq;
			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
238
			handler(&ev, srq->ibsrq.srq_context);
239 240 241
			goto bail;
		}
	}
242
unlock:
243
	spin_unlock_irqrestore(&rq->lock, flags);
244 245 246 247 248 249
bail:
	return ret;
}

/**
 * ipath_ruc_loopback - handle UC and RC lookback requests
250
 * @sqp: the sending QP
251
 *
252
 * This is called from ipath_do_send() to
253 254 255 256 257 258
 * forward a WQE addressed to the same HCA.
 * Note that although we are single threaded due to the tasklet, we still
 * have to protect against post_send().  We don't have to worry about
 * receive interrupts since this is a connected protocol and all packets
 * will pass through here.
 */
259
static void ipath_ruc_loopback(struct ipath_qp *sqp)
260 261 262 263 264 265
{
	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
	struct ipath_qp *qp;
	struct ipath_swqe *wqe;
	struct ipath_sge *sge;
	unsigned long flags;
266
	struct ib_wc wc;
267
	u64 sdata;
268
	atomic64_t *maddr;
269
	enum ib_wc_status send_status;
270

271 272 273 274
	/*
	 * Note that we check the responder QP state after
	 * checking the requester's state.
	 */
275 276 277 278
	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);

	spin_lock_irqsave(&sqp->s_lock, flags);

279 280 281 282
	/* Return if we are already busy processing a work request. */
	if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
	    !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
		goto unlock;
283

284 285 286 287 288 289 290 291 292 293 294 295 296 297
	sqp->s_flags |= IPATH_S_BUSY;

again:
	if (sqp->s_last == sqp->s_head)
		goto clr_busy;
	wqe = get_swqe_ptr(sqp, sqp->s_last);

	/* Return if it is not OK to start a new work reqeust. */
	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
		if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
			goto clr_busy;
		/* We are in the error state, flush the work request. */
		send_status = IB_WC_WR_FLUSH_ERR;
		goto flush_send;
298 299 300 301 302
	}

	/*
	 * We can rely on the entry not changing without the s_lock
	 * being held until we update s_last.
303
	 * We increment s_cur to indicate s_last is in progress.
304
	 */
305 306 307 308
	if (sqp->s_last == sqp->s_cur) {
		if (++sqp->s_cur >= sqp->s_size)
			sqp->s_cur = 0;
	}
309 310
	spin_unlock_irqrestore(&sqp->s_lock, flags);

311 312 313 314 315 316 317 318 319 320 321 322 323
	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
		dev->n_pkt_drops++;
		/*
		 * For RC, the requester would timeout and retry so
		 * shortcut the timeouts and just signal too many retries.
		 */
		if (sqp->ibqp.qp_type == IB_QPT_RC)
			send_status = IB_WC_RETRY_EXC_ERR;
		else
			send_status = IB_WC_SUCCESS;
		goto serr;
	}

324 325
	memset(&wc, 0, sizeof wc);
	send_status = IB_WC_SUCCESS;
326 327 328 329 330 331 332

	sqp->s_sge.sge = wqe->sg_list[0];
	sqp->s_sge.sg_list = wqe->sg_list + 1;
	sqp->s_sge.num_sge = wqe->wr.num_sge;
	sqp->s_len = wqe->length;
	switch (wqe->wr.opcode) {
	case IB_WR_SEND_WITH_IMM:
333
		wc.wc_flags = IB_WC_WITH_IMM;
334
		wc.imm_data = wqe->wr.ex.imm_data;
335 336
		/* FALLTHROUGH */
	case IB_WR_SEND:
337 338
		if (!ipath_get_rwqe(qp, 0))
			goto rnr_nak;
339 340 341
		break;

	case IB_WR_RDMA_WRITE_WITH_IMM:
342 343
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
			goto inv_err;
344
		wc.wc_flags = IB_WC_WITH_IMM;
345
		wc.imm_data = wqe->wr.ex.imm_data;
346 347 348 349
		if (!ipath_get_rwqe(qp, 1))
			goto rnr_nak;
		/* FALLTHROUGH */
	case IB_WR_RDMA_WRITE:
350 351
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
			goto inv_err;
352 353
		if (wqe->length == 0)
			break;
354
		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
355 356
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
357 358
					    IB_ACCESS_REMOTE_WRITE)))
			goto acc_err;
359 360 361
		break;

	case IB_WR_RDMA_READ:
362 363
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
			goto inv_err;
364
		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
365 366 367 368 369 370 371 372 373 374 375
					    wqe->wr.wr.rdma.remote_addr,
					    wqe->wr.wr.rdma.rkey,
					    IB_ACCESS_REMOTE_READ)))
			goto acc_err;
		qp->r_sge.sge = wqe->sg_list[0];
		qp->r_sge.sg_list = wqe->sg_list + 1;
		qp->r_sge.num_sge = wqe->wr.num_sge;
		break;

	case IB_WR_ATOMIC_CMP_AND_SWP:
	case IB_WR_ATOMIC_FETCH_AND_ADD:
376 377
		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
			goto inv_err;
378
		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
379 380
					    wqe->wr.wr.atomic.remote_addr,
					    wqe->wr.wr.atomic.rkey,
381 382 383
					    IB_ACCESS_REMOTE_ATOMIC)))
			goto acc_err;
		/* Perform atomic OP and save result. */
384 385 386 387 388 389 390
		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
		sdata = wqe->wr.wr.atomic.compare_add;
		*(u64 *) sqp->s_sge.sge.vaddr =
			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
			(u64) atomic64_add_return(sdata, maddr) - sdata :
			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
				      sdata, wqe->wr.wr.atomic.swap);
391 392 393
		goto send_comp;

	default:
394 395
		send_status = IB_WC_LOC_QP_OP_ERR;
		goto serr;
396 397 398 399 400 401 402 403
	}

	sge = &sqp->s_sge.sge;
	while (sqp->s_len) {
		u32 len = sqp->s_len;

		if (len > sge->length)
			len = sge->length;
404 405
		if (len > sge->sge_length)
			len = sge->sge_length;
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
		BUG_ON(len == 0);
		ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
		sge->vaddr += len;
		sge->length -= len;
		sge->sge_length -= len;
		if (sge->sge_length == 0) {
			if (--sqp->s_sge.num_sge)
				*sge = *sqp->s_sge.sg_list++;
		} else if (sge->length == 0 && sge->mr != NULL) {
			if (++sge->n >= IPATH_SEGSZ) {
				if (++sge->m >= sge->mr->mapsz)
					break;
				sge->n = 0;
			}
			sge->vaddr =
				sge->mr->map[sge->m]->segs[sge->n].vaddr;
			sge->length =
				sge->mr->map[sge->m]->segs[sge->n].length;
		}
		sqp->s_len -= len;
	}

428
	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
429 430 431
		goto send_comp;

	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
432
		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
433
	else
434 435 436 437
		wc.opcode = IB_WC_RECV;
	wc.wr_id = qp->r_wr_id;
	wc.status = IB_WC_SUCCESS;
	wc.byte_len = wqe->length;
438
	wc.qp = &qp->ibqp;
439 440 441
	wc.src_qp = qp->remote_qpn;
	wc.slid = qp->remote_ah_attr.dlid;
	wc.sl = qp->remote_ah_attr.sl;
442
	wc.port_num = 1;
443
	/* Signal completion event if the solicited bit is set. */
444
	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
445 446 447
		       wqe->wr.send_flags & IB_SEND_SOLICITED);

send_comp:
448 449
	spin_lock_irqsave(&sqp->s_lock, flags);
flush_send:
450
	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
451
	ipath_send_complete(sqp, wqe, send_status);
452 453
	goto again;

454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
rnr_nak:
	/* Handle RNR NAK */
	if (qp->ibqp.qp_type == IB_QPT_UC)
		goto send_comp;
	/*
	 * Note: we don't need the s_lock held since the BUSY flag
	 * makes this single threaded.
	 */
	if (sqp->s_rnr_retry == 0) {
		send_status = IB_WC_RNR_RETRY_EXC_ERR;
		goto serr;
	}
	if (sqp->s_rnr_retry_cnt < 7)
		sqp->s_rnr_retry--;
	spin_lock_irqsave(&sqp->s_lock, flags);
	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
470 471
		goto clr_busy;
	sqp->s_flags |= IPATH_S_WAITING;
472 473 474
	dev->n_rnr_naks++;
	sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
	ipath_insert_rnr_queue(sqp);
475
	goto clr_busy;
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506

inv_err:
	send_status = IB_WC_REM_INV_REQ_ERR;
	wc.status = IB_WC_LOC_QP_OP_ERR;
	goto err;

acc_err:
	send_status = IB_WC_REM_ACCESS_ERR;
	wc.status = IB_WC_LOC_PROT_ERR;
err:
	/* responder goes to error state */
	ipath_rc_error(qp, wc.status);

serr:
	spin_lock_irqsave(&sqp->s_lock, flags);
	ipath_send_complete(sqp, wqe, send_status);
	if (sqp->ibqp.qp_type == IB_QPT_RC) {
		int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);

		sqp->s_flags &= ~IPATH_S_BUSY;
		spin_unlock_irqrestore(&sqp->s_lock, flags);
		if (lastwqe) {
			struct ib_event ev;

			ev.device = sqp->ibqp.device;
			ev.element.qp = &sqp->ibqp;
			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
		}
		goto done;
	}
507 508
clr_busy:
	sqp->s_flags &= ~IPATH_S_BUSY;
509 510
unlock:
	spin_unlock_irqrestore(&sqp->s_lock, flags);
511
done:
512
	if (qp && atomic_dec_and_test(&qp->refcount))
513 514 515
		wake_up(&qp->wait);
}

516
static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
517
{
518
	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
519
	    qp->ibqp.qp_type == IB_QPT_SMI) {
520 521 522 523 524 525 526 527 528
		unsigned long flags;

		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
		dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
				 dd->ipath_sendctrl);
		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
	}
529 530
}

531 532 533 534 535 536
/**
 * ipath_no_bufs_available - tell the layer driver we need buffers
 * @qp: the QP that caused the problem
 * @dev: the device we ran out of buffers on
 *
 * Called when we run out of PIO buffers.
537 538
 * If we are now in the error state, return zero to flush the
 * send work request.
539
 */
540
static int ipath_no_bufs_available(struct ipath_qp *qp,
541
				    struct ipath_ibdev *dev)
542 543
{
	unsigned long flags;
544
	int ret = 1;
545 546

	/*
547
	 * Note that as soon as want_buffer() is called and
548
	 * possibly before it returns, ipath_ib_piobufavail()
549 550
	 * could be called. Therefore, put QP on the piowait list before
	 * enabling the PIO avail interrupt.
551
	 */
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
	spin_lock_irqsave(&qp->s_lock, flags);
	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
		dev->n_piowait++;
		qp->s_flags |= IPATH_S_WAITING;
		qp->s_flags &= ~IPATH_S_BUSY;
		spin_lock(&dev->pending_lock);
		if (list_empty(&qp->piowait))
			list_add_tail(&qp->piowait, &dev->piowait);
		spin_unlock(&dev->pending_lock);
	} else
		ret = 0;
	spin_unlock_irqrestore(&qp->s_lock, flags);
	if (ret)
		want_buffer(dev->dd, qp);
	return ret;
567 568
}

569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
/**
 * ipath_make_grh - construct a GRH header
 * @dev: a pointer to the ipath device
 * @hdr: a pointer to the GRH header being constructed
 * @grh: the global route address to send to
 * @hwords: the number of 32 bit words of header being sent
 * @nwords: the number of 32 bit words of data being sent
 *
 * Return the size of the header in 32 bit words.
 */
u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
		   struct ib_global_route *grh, u32 hwords, u32 nwords)
{
	hdr->version_tclass_flow =
		cpu_to_be32((6 << 28) |
			    (grh->traffic_class << 20) |
			    grh->flow_label);
	hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
	/* next_hdr is defined by C8-7 in ch. 8.4.1 */
	hdr->next_hdr = 0x1B;
	hdr->hop_limit = grh->hop_limit;
	/* The SGID is 32-bit aligned. */
	hdr->sgid.global.subnet_prefix = dev->gid_prefix;
592
	hdr->sgid.global.interface_id = dev->dd->ipath_guid;
593 594 595 596 597 598
	hdr->dgid = grh->dgid;

	/* GRH header size in 32-bit words. */
	return sizeof(struct ib_grh) / sizeof(u32);
}

599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
			   struct ipath_other_headers *ohdr,
			   u32 bth0, u32 bth2)
{
	u16 lrh0;
	u32 nwords;
	u32 extra_bytes;

	/* Construct the header. */
	extra_bytes = -qp->s_cur_size & 3;
	nwords = (qp->s_cur_size + extra_bytes) >> 2;
	lrh0 = IPATH_LRH_BTH;
	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
		qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
						 &qp->remote_ah_attr.grh,
						 qp->s_hdrwords, nwords);
		lrh0 = IPATH_LRH_GRH;
	}
	lrh0 |= qp->remote_ah_attr.sl << 4;
	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
	bth0 |= extra_bytes << 20;
	ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
	ohdr->bth[2] = cpu_to_be32(bth2);
}

629
/**
630
 * ipath_do_send - perform a send on a QP
631 632 633 634
 * @data: contains a pointer to the QP
 *
 * Process entries in the send work queue until credit or queue is
 * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
635
 * Otherwise, two threads could send packets out of order.
636
 */
637
void ipath_do_send(unsigned long data)
638 639 640
{
	struct ipath_qp *qp = (struct ipath_qp *)data;
	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
641
	int (*make_req)(struct ipath_qp *qp);
642
	unsigned long flags;
643

644 645 646
	if ((qp->ibqp.qp_type == IB_QPT_RC ||
	     qp->ibqp.qp_type == IB_QPT_UC) &&
	    qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
647
		ipath_ruc_loopback(qp);
648
		goto bail;
649 650
	}

651 652 653 654 655 656
	if (qp->ibqp.qp_type == IB_QPT_RC)
	       make_req = ipath_make_rc_req;
	else if (qp->ibqp.qp_type == IB_QPT_UC)
	       make_req = ipath_make_uc_req;
	else
	       make_req = ipath_make_ud_req;
657

658 659 660 661 662 663 664 665 666 667 668 669 670
	spin_lock_irqsave(&qp->s_lock, flags);

	/* Return if we are already busy processing a work request. */
	if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
	    !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
		spin_unlock_irqrestore(&qp->s_lock, flags);
		goto bail;
	}

	qp->s_flags |= IPATH_S_BUSY;

	spin_unlock_irqrestore(&qp->s_lock, flags);

671 672 673 674 675 676 677
again:
	/* Check for a constructed packet to be sent. */
	if (qp->s_hdrwords != 0) {
		/*
		 * If no PIO bufs are available, return.  An interrupt will
		 * call ipath_ib_piobufavail() when one is available.
		 */
678 679
		if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
				     qp->s_cur_sge, qp->s_cur_size)) {
680 681
			if (ipath_no_bufs_available(qp, dev))
				goto bail;
682 683 684 685 686 687
		}
		dev->n_unicast_xmit++;
		/* Record that we sent the packet and s_hdr is empty. */
		qp->s_hdrwords = 0;
	}

688 689
	if (make_req(qp))
		goto again;
690

691 692
bail:;
}
693

694 695 696
/*
 * This should be called with s_lock held.
 */
697 698 699
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
			 enum ib_wc_status status)
{
700 701 702 703
	u32 old_last, last;

	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
		return;
704

705 706 707 708 709
	/* See ch. 11.2.4.1 and 10.7.3.1 */
	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
	    status != IB_WC_SUCCESS) {
		struct ib_wc wc;
710

711
		memset(&wc, 0, sizeof wc);
712 713 714 715
		wc.wr_id = wqe->wr.wr_id;
		wc.status = status;
		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
		wc.qp = &qp->ibqp;
716 717 718 719
		if (status == IB_WC_SUCCESS)
			wc.byte_len = wqe->length;
		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
			       status != IB_WC_SUCCESS);
720
	}
721

722
	old_last = last = qp->s_last;
723 724 725
	if (++last >= qp->s_size)
		last = 0;
	qp->s_last = last;
726 727 728 729 730 731
	if (qp->s_cur == old_last)
		qp->s_cur = last;
	if (qp->s_tail == old_last)
		qp->s_tail = last;
	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
		qp->s_draining = 0;
732
}