svc_rdma_sendto.c 17.9 KB
Newer Older
T
Tom Tucker 已提交
1
/*
S
Steve Wise 已提交
2
 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
T
Tom Tucker 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the BSD-type
 * license below:
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *      Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *      Redistributions in binary form must reproduce the above
 *      copyright notice, this list of conditions and the following
 *      disclaimer in the documentation and/or other materials provided
 *      with the distribution.
 *
 *      Neither the name of the Network Appliance, Inc. nor the names of
 *      its contributors may be used to endorse or promote products
 *      derived from this software without specific prior written
 *      permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Author: Tom Tucker <tom@opengridcomputing.com>
 */

#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>

#define RPCDBG_FACILITY	RPCDBG_SVCXPRT

53 54 55 56 57
static u32 xdr_padsize(u32 len)
{
	return (len & 3) ? (4 - (len & 3)) : 0;
}

C
Chuck Lever 已提交
58 59 60
int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
		     struct xdr_buf *xdr,
		     struct svc_rdma_req_map *vec)
T
Tom Tucker 已提交
61 62 63 64
{
	int sge_no;
	u32 sge_bytes;
	u32 page_bytes;
65
	u32 page_off;
T
Tom Tucker 已提交
66 67
	int page_no;

68 69
	if (xdr->len !=
	    (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
C
Chuck Lever 已提交
70
		pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
71 72
		return -EIO;
	}
73

T
Tom Tucker 已提交
74 75 76 77
	/* Skip the first sge, this is for the RPCRDMA header */
	sge_no = 1;

	/* Head SGE */
78 79
	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
T
Tom Tucker 已提交
80 81 82 83 84 85
	sge_no++;

	/* pages SGE */
	page_no = 0;
	page_bytes = xdr->page_len;
	page_off = xdr->page_base;
86 87 88 89
	while (page_bytes) {
		vec->sge[sge_no].iov_base =
			page_address(xdr->pages[page_no]) + page_off;
		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
T
Tom Tucker 已提交
90
		page_bytes -= sge_bytes;
91
		vec->sge[sge_no].iov_len = sge_bytes;
T
Tom Tucker 已提交
92 93 94 95 96 97 98

		sge_no++;
		page_no++;
		page_off = 0; /* reset for next time through loop */
	}

	/* Tail SGE */
99 100 101
	if (xdr->tail[0].iov_len) {
		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
		vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
T
Tom Tucker 已提交
102 103 104
		sge_no++;
	}

C
Chuck Lever 已提交
105
	dprintk("svcrdma: %s: sge_no %d page_no %d "
106
		"page_base %u page_len %u head_len %zu tail_len %zu\n",
C
Chuck Lever 已提交
107
		__func__, sge_no, page_no, xdr->page_base, xdr->page_len,
108 109
		xdr->head[0].iov_len, xdr->tail[0].iov_len);

110
	vec->count = sge_no;
111
	return 0;
T
Tom Tucker 已提交
112 113
}

114 115 116 117 118 119 120 121 122 123 124 125 126 127
static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
			      struct xdr_buf *xdr,
			      u32 xdr_off, size_t len, int dir)
{
	struct page *page;
	dma_addr_t dma_addr;
	if (xdr_off < xdr->head[0].iov_len) {
		/* This offset is in the head */
		xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
		page = virt_to_page(xdr->head[0].iov_base);
	} else {
		xdr_off -= xdr->head[0].iov_len;
		if (xdr_off < xdr->page_len) {
			/* This offset is in the page list */
128
			xdr_off += xdr->page_base;
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
			page = xdr->pages[xdr_off >> PAGE_SHIFT];
			xdr_off &= ~PAGE_MASK;
		} else {
			/* This offset is in the tail */
			xdr_off -= xdr->page_len;
			xdr_off += (unsigned long)
				xdr->tail[0].iov_base & ~PAGE_MASK;
			page = virt_to_page(xdr->tail[0].iov_base);
		}
	}
	dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
				   min_t(size_t, PAGE_SIZE, len), dir);
	return dma_addr;
}

144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
/* Returns the address of the first read chunk or <nul> if no read chunk
 * is present
 */
struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
{
	struct rpcrdma_read_chunk *ch =
		(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];

	if (ch->rc_discrim == xdr_zero)
		return NULL;
	return ch;
}

/* Returns the address of the first read write array element or <nul>
 * if no write array list is present
 */
static struct rpcrdma_write_array *
svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
{
	if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
	    rmsgp->rm_body.rm_chunks[1] == xdr_zero)
		return NULL;
	return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
}

/* Returns the address of the first reply array element or <nul> if no
 * reply array is present
 */
static struct rpcrdma_write_array *
174 175
svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
			 struct rpcrdma_write_array *wr_ary)
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
{
	struct rpcrdma_read_chunk *rch;
	struct rpcrdma_write_array *rp_ary;

	/* XXX: Need to fix when reply chunk may occur with read list
	 *	and/or write list.
	 */
	if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
	    rmsgp->rm_body.rm_chunks[1] != xdr_zero)
		return NULL;

	rch = svc_rdma_get_read_chunk(rmsgp);
	if (rch) {
		while (rch->rc_discrim != xdr_zero)
			rch++;

		/* The reply chunk follows an empty write array located
		 * at 'rc_position' here. The reply array is at rc_target.
		 */
		rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
		goto found_it;
	}

	if (wr_ary) {
		int chunk = be32_to_cpu(wr_ary->wc_nchunks);

		rp_ary = (struct rpcrdma_write_array *)
			 &wr_ary->wc_array[chunk].wc_target.rs_length;
		goto found_it;
	}

	/* No read list, no write list */
	rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2];

 found_it:
	if (rp_ary->wc_discrim == xdr_zero)
		return NULL;
	return rp_ary;
}

T
Tom Tucker 已提交
216 217 218 219 220 221
/* Assumptions:
 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
 */
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
		      u32 rmr, u64 to,
		      u32 xdr_off, int write_len,
222
		      struct svc_rdma_req_map *vec)
T
Tom Tucker 已提交
223
{
C
Christoph Hellwig 已提交
224
	struct ib_rdma_wr write_wr;
T
Tom Tucker 已提交
225 226 227 228 229 230 231 232
	struct ib_sge *sge;
	int xdr_sge_no;
	int sge_no;
	int sge_bytes;
	int sge_off;
	int bc;
	struct svc_rdma_op_ctxt *ctxt;

233 234 235 236 237
	if (vec->count > RPCSVC_MAXPAGES) {
		pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
		return -EIO;
	}

T
Tom Tucker 已提交
238
	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
239
		"write_len=%d, vec->sge=%p, vec->count=%lu\n",
240
		rmr, (unsigned long long)to, xdr_off,
241
		write_len, vec->sge, vec->count);
T
Tom Tucker 已提交
242 243

	ctxt = svc_rdma_get_context(xprt);
244 245
	ctxt->direction = DMA_TO_DEVICE;
	sge = ctxt->sge;
T
Tom Tucker 已提交
246 247

	/* Find the SGE associated with xdr_off */
248
	for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
T
Tom Tucker 已提交
249
	     xdr_sge_no++) {
250
		if (vec->sge[xdr_sge_no].iov_len > bc)
T
Tom Tucker 已提交
251
			break;
252
		bc -= vec->sge[xdr_sge_no].iov_len;
T
Tom Tucker 已提交
253 254 255 256 257 258 259
	}

	sge_off = bc;
	bc = write_len;
	sge_no = 0;

	/* Copy the remaining SGE */
260 261 262
	while (bc != 0) {
		sge_bytes = min_t(size_t,
			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
T
Tom Tucker 已提交
263
		sge[sge_no].length = sge_bytes;
S
Steve Wise 已提交
264 265 266 267 268 269 270 271
		sge[sge_no].addr =
			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
				    sge_bytes, DMA_TO_DEVICE);
		xdr_off += sge_bytes;
		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
					 sge[sge_no].addr))
			goto err;
		atomic_inc(&xprt->sc_dma_used);
C
Christoph Hellwig 已提交
272
		sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
273
		ctxt->count++;
T
Tom Tucker 已提交
274 275 276
		sge_off = 0;
		sge_no++;
		xdr_sge_no++;
277 278 279 280
		if (xdr_sge_no > vec->count) {
			pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
			goto err;
		}
T
Tom Tucker 已提交
281
		bc -= sge_bytes;
282 283
		if (sge_no == xprt->sc_max_sge)
			break;
T
Tom Tucker 已提交
284 285 286 287 288
	}

	/* Prepare WRITE WR */
	memset(&write_wr, 0, sizeof write_wr);
	ctxt->wr_op = IB_WR_RDMA_WRITE;
C
Christoph Hellwig 已提交
289 290 291 292 293 294 295
	write_wr.wr.wr_id = (unsigned long)ctxt;
	write_wr.wr.sg_list = &sge[0];
	write_wr.wr.num_sge = sge_no;
	write_wr.wr.opcode = IB_WR_RDMA_WRITE;
	write_wr.wr.send_flags = IB_SEND_SIGNALED;
	write_wr.rkey = rmr;
	write_wr.remote_addr = to;
T
Tom Tucker 已提交
296 297 298

	/* Post It */
	atomic_inc(&rdma_stat_write);
C
Christoph Hellwig 已提交
299
	if (svc_rdma_send(xprt, &write_wr.wr))
300
		goto err;
301
	return write_len - bc;
302
 err:
303
	svc_rdma_unmap_dma(ctxt);
304 305 306
	svc_rdma_put_context(ctxt, 0);
	/* Fatal error, close transport */
	return -EIO;
T
Tom Tucker 已提交
307 308
}

309
noinline
T
Tom Tucker 已提交
310
static int send_write_chunks(struct svcxprt_rdma *xprt,
311
			     struct rpcrdma_write_array *wr_ary,
T
Tom Tucker 已提交
312 313
			     struct rpcrdma_msg *rdma_resp,
			     struct svc_rqst *rqstp,
314
			     struct svc_rdma_req_map *vec)
T
Tom Tucker 已提交
315
{
316
	u32 xfer_len = rqstp->rq_res.page_len;
T
Tom Tucker 已提交
317 318 319 320
	int write_len;
	u32 xdr_off;
	int chunk_off;
	int chunk_no;
321
	int nchunks;
T
Tom Tucker 已提交
322 323 324 325 326 327 328
	struct rpcrdma_write_array *res_ary;
	int ret;

	res_ary = (struct rpcrdma_write_array *)
		&rdma_resp->rm_body.rm_chunks[1];

	/* Write chunks start at the pagelist */
329
	nchunks = be32_to_cpu(wr_ary->wc_nchunks);
T
Tom Tucker 已提交
330
	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
331
	     xfer_len && chunk_no < nchunks;
T
Tom Tucker 已提交
332 333 334 335
	     chunk_no++) {
		struct rpcrdma_segment *arg_ch;
		u64 rs_offset;

336
		arg_ch = &wr_ary->wc_array[chunk_no].wc_target;
337
		write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
T
Tom Tucker 已提交
338 339 340

		/* Prepare the response chunk given the length actually
		 * written */
341
		xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset);
T
Tom Tucker 已提交
342
		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
343 344 345
						arg_ch->rs_handle,
						arg_ch->rs_offset,
						write_len);
T
Tom Tucker 已提交
346 347 348
		chunk_off = 0;
		while (write_len) {
			ret = send_write(xprt, rqstp,
349
					 be32_to_cpu(arg_ch->rs_handle),
T
Tom Tucker 已提交
350 351
					 rs_offset + chunk_off,
					 xdr_off,
352
					 write_len,
353
					 vec);
354 355
			if (ret <= 0)
				goto out_err;
356 357 358 359
			chunk_off += ret;
			xdr_off += ret;
			xfer_len -= ret;
			write_len -= ret;
T
Tom Tucker 已提交
360 361 362 363 364
		}
	}
	/* Update the req with the number of chunks actually used */
	svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);

365
	return rqstp->rq_res.page_len;
366 367 368 369

out_err:
	pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret);
	return -EIO;
T
Tom Tucker 已提交
370 371
}

372
noinline
T
Tom Tucker 已提交
373
static int send_reply_chunks(struct svcxprt_rdma *xprt,
374
			     struct rpcrdma_write_array *rp_ary,
T
Tom Tucker 已提交
375 376
			     struct rpcrdma_msg *rdma_resp,
			     struct svc_rqst *rqstp,
377
			     struct svc_rdma_req_map *vec)
T
Tom Tucker 已提交
378 379 380 381 382 383
{
	u32 xfer_len = rqstp->rq_res.len;
	int write_len;
	u32 xdr_off;
	int chunk_no;
	int chunk_off;
384
	int nchunks;
T
Tom Tucker 已提交
385 386 387 388 389 390 391 392 393 394
	struct rpcrdma_segment *ch;
	struct rpcrdma_write_array *res_ary;
	int ret;

	/* XXX: need to fix when reply lists occur with read-list and or
	 * write-list */
	res_ary = (struct rpcrdma_write_array *)
		&rdma_resp->rm_body.rm_chunks[2];

	/* xdr offset starts at RPC message */
395
	nchunks = be32_to_cpu(rp_ary->wc_nchunks);
T
Tom Tucker 已提交
396
	for (xdr_off = 0, chunk_no = 0;
397
	     xfer_len && chunk_no < nchunks;
T
Tom Tucker 已提交
398 399
	     chunk_no++) {
		u64 rs_offset;
400
		ch = &rp_ary->wc_array[chunk_no].wc_target;
401
		write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
T
Tom Tucker 已提交
402 403 404

		/* Prepare the reply chunk given the length actually
		 * written */
405
		xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset);
T
Tom Tucker 已提交
406
		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
407 408
						ch->rs_handle, ch->rs_offset,
						write_len);
T
Tom Tucker 已提交
409 410 411
		chunk_off = 0;
		while (write_len) {
			ret = send_write(xprt, rqstp,
412
					 be32_to_cpu(ch->rs_handle),
T
Tom Tucker 已提交
413 414
					 rs_offset + chunk_off,
					 xdr_off,
415
					 write_len,
416
					 vec);
417 418
			if (ret <= 0)
				goto out_err;
419 420 421 422
			chunk_off += ret;
			xdr_off += ret;
			xfer_len -= ret;
			write_len -= ret;
T
Tom Tucker 已提交
423 424 425 426 427 428
		}
	}
	/* Update the req with the number of chunks actually used */
	svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);

	return rqstp->rq_res.len;
429 430 431 432

out_err:
	pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret);
	return -EIO;
T
Tom Tucker 已提交
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
}

/* This function prepares the portion of the RPCRDMA message to be
 * sent in the RDMA_SEND. This function is called after data sent via
 * RDMA has already been transmitted. There are three cases:
 * - The RPCRDMA header, RPC header, and payload are all sent in a
 *   single RDMA_SEND. This is the "inline" case.
 * - The RPCRDMA header and some portion of the RPC header and data
 *   are sent via this RDMA_SEND and another portion of the data is
 *   sent via RDMA.
 * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
 *   header and data are all transmitted via RDMA.
 * In all three cases, this function prepares the RPCRDMA header in
 * sge[0], the 'type' parameter indicates the type to place in the
 * RPCRDMA header, and the 'byte_count' field indicates how much of
448 449
 * the XDR to include in this RDMA_SEND. NB: The offset of the payload
 * to send is zero in the XDR.
T
Tom Tucker 已提交
450 451 452 453 454 455
 */
static int send_reply(struct svcxprt_rdma *rdma,
		      struct svc_rqst *rqstp,
		      struct page *page,
		      struct rpcrdma_msg *rdma_resp,
		      struct svc_rdma_op_ctxt *ctxt,
456
		      struct svc_rdma_req_map *vec,
T
Tom Tucker 已提交
457 458 459
		      int byte_count)
{
	struct ib_send_wr send_wr;
460
	u32 xdr_off;
T
Tom Tucker 已提交
461 462 463
	int sge_no;
	int sge_bytes;
	int page_no;
464
	int pages;
T
Tom Tucker 已提交
465 466
	int ret;

467
	/* Post a recv buffer to handle another request. */
468
	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
469 470 471 472 473
	if (ret) {
		printk(KERN_INFO
		       "svcrdma: could not post a receive buffer, err=%d."
		       "Closing transport %p.\n", ret, rdma);
		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
474 475
		svc_rdma_put_context(ctxt, 0);
		return -ENOTCONN;
476 477
	}

T
Tom Tucker 已提交
478 479 480 481 482
	/* Prepare the context */
	ctxt->pages[0] = page;
	ctxt->count = 1;

	/* Prepare the SGE for the RPCRDMA Header */
C
Christoph Hellwig 已提交
483
	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
484
	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
T
Tom Tucker 已提交
485
	ctxt->sge[0].addr =
486 487
	    ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
			    ctxt->sge[0].length, DMA_TO_DEVICE);
488 489 490 491
	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
		goto err;
	atomic_inc(&rdma->sc_dma_used);

T
Tom Tucker 已提交
492
	ctxt->direction = DMA_TO_DEVICE;
493

494
	/* Map the payload indicated by 'byte_count' */
495
	xdr_off = 0;
496 497
	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
T
Tom Tucker 已提交
498
		byte_count -= sge_bytes;
S
Steve Wise 已提交
499 500 501 502 503 504 505 506
		ctxt->sge[sge_no].addr =
			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
				    sge_bytes, DMA_TO_DEVICE);
		xdr_off += sge_bytes;
		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
					 ctxt->sge[sge_no].addr))
			goto err;
		atomic_inc(&rdma->sc_dma_used);
C
Christoph Hellwig 已提交
507
		ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
508
		ctxt->sge[sge_no].length = sge_bytes;
T
Tom Tucker 已提交
509
	}
510 511 512 513
	if (byte_count != 0) {
		pr_err("svcrdma: Could not map %d bytes\n", byte_count);
		goto err;
	}
T
Tom Tucker 已提交
514 515 516 517 518

	/* Save all respages in the ctxt and remove them from the
	 * respages array. They are our pages until the I/O
	 * completes.
	 */
519 520
	pages = rqstp->rq_next_page - rqstp->rq_respages;
	for (page_no = 0; page_no < pages; page_no++) {
T
Tom Tucker 已提交
521 522 523
		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
		ctxt->count++;
		rqstp->rq_respages[page_no] = NULL;
524 525 526 527 528
		/*
		 * If there are more pages than SGE, terminate SGE
		 * list so that svc_rdma_unmap_dma doesn't attempt to
		 * unmap garbage.
		 */
529 530
		if (page_no+1 >= sge_no)
			ctxt->sge[page_no+1].length = 0;
T
Tom Tucker 已提交
531
	}
T
Tom Tucker 已提交
532
	rqstp->rq_next_page = rqstp->rq_respages + 1;
S
Steve Wise 已提交
533

534 535 536 537 538 539 540
	/* The loop above bumps sc_dma_used for each sge. The
	 * xdr_buf.tail gets a separate sge, but resides in the
	 * same page as xdr_buf.head. Don't count it twice.
	 */
	if (sge_no > ctxt->count)
		atomic_dec(&rdma->sc_dma_used);

541 542 543 544
	if (sge_no > rdma->sc_max_sge) {
		pr_err("svcrdma: Too many sges (%d)\n", sge_no);
		goto err;
	}
T
Tom Tucker 已提交
545 546 547 548 549 550 551 552 553 554
	memset(&send_wr, 0, sizeof send_wr);
	ctxt->wr_op = IB_WR_SEND;
	send_wr.wr_id = (unsigned long)ctxt;
	send_wr.sg_list = ctxt->sge;
	send_wr.num_sge = sge_no;
	send_wr.opcode = IB_WR_SEND;
	send_wr.send_flags =  IB_SEND_SIGNALED;

	ret = svc_rdma_send(rdma, &send_wr);
	if (ret)
555
		goto err;
T
Tom Tucker 已提交
556

557 558 559
	return 0;

 err:
S
Steve Wise 已提交
560
	svc_rdma_unmap_dma(ctxt);
561 562
	svc_rdma_put_context(ctxt, 1);
	return -EIO;
T
Tom Tucker 已提交
563 564 565 566 567 568 569 570 571 572 573 574 575
}

void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}

int svc_rdma_sendto(struct svc_rqst *rqstp)
{
	struct svc_xprt *xprt = rqstp->rq_xprt;
	struct svcxprt_rdma *rdma =
		container_of(xprt, struct svcxprt_rdma, sc_xprt);
	struct rpcrdma_msg *rdma_argp;
	struct rpcrdma_msg *rdma_resp;
576
	struct rpcrdma_write_array *wr_ary, *rp_ary;
T
Tom Tucker 已提交
577 578 579 580 581
	enum rpcrdma_proc reply_type;
	int ret;
	int inline_bytes;
	struct page *res_page;
	struct svc_rdma_op_ctxt *ctxt;
582
	struct svc_rdma_req_map *vec;
T
Tom Tucker 已提交
583 584 585

	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);

C
Chuck Lever 已提交
586 587 588 589
	/* Get the RDMA request header. The receive logic always
	 * places this at the start of page 0.
	 */
	rdma_argp = page_address(rqstp->rq_pages[0]);
590 591
	wr_ary = svc_rdma_get_write_array(rdma_argp);
	rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
T
Tom Tucker 已提交
592

593
	/* Build an req vec for the XDR */
T
Tom Tucker 已提交
594 595
	ctxt = svc_rdma_get_context(rdma);
	ctxt->direction = DMA_TO_DEVICE;
596
	vec = svc_rdma_get_req_map(rdma);
C
Chuck Lever 已提交
597
	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
598 599
	if (ret)
		goto err0;
T
Tom Tucker 已提交
600 601 602
	inline_bytes = rqstp->rq_res.len;

	/* Create the RDMA response header */
603 604 605 606
	ret = -ENOMEM;
	res_page = alloc_page(GFP_KERNEL);
	if (!res_page)
		goto err0;
T
Tom Tucker 已提交
607
	rdma_resp = page_address(res_page);
608
	if (rp_ary)
T
Tom Tucker 已提交
609 610 611 612 613 614 615
		reply_type = RDMA_NOMSG;
	else
		reply_type = RDMA_MSG;
	svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
					 rdma_resp, reply_type);

	/* Send any write-chunk data and build resp write-list */
616 617 618 619
	if (wr_ary) {
		ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec);
		if (ret < 0)
			goto err1;
620
		inline_bytes -= ret + xdr_padsize(ret);
T
Tom Tucker 已提交
621 622 623
	}

	/* Send any reply-list data and update resp reply-list */
624 625 626 627 628
	if (rp_ary) {
		ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec);
		if (ret < 0)
			goto err1;
		inline_bytes -= ret;
T
Tom Tucker 已提交
629 630
	}

631
	ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
T
Tom Tucker 已提交
632
			 inline_bytes);
633
	svc_rdma_put_req_map(rdma, vec);
T
Tom Tucker 已提交
634 635
	dprintk("svcrdma: send_reply returns %d\n", ret);
	return ret;
636 637 638 639

 err1:
	put_page(res_page);
 err0:
640
	svc_rdma_put_req_map(rdma, vec);
T
Tom Tucker 已提交
641 642 643
	svc_rdma_put_context(ctxt, 0);
	return ret;
}