svc_rdma_sendto.c 18.1 KB
Newer Older
T
Tom Tucker 已提交
1
/*
S
Steve Wise 已提交
2
 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
T
Tom Tucker 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the BSD-type
 * license below:
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *      Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *      Redistributions in binary form must reproduce the above
 *      copyright notice, this list of conditions and the following
 *      disclaimer in the documentation and/or other materials provided
 *      with the distribution.
 *
 *      Neither the name of the Network Appliance, Inc. nor the names of
 *      its contributors may be used to endorse or promote products
 *      derived from this software without specific prior written
 *      permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Author: Tom Tucker <tom@opengridcomputing.com>
 */

#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>

#define RPCDBG_FACILITY	RPCDBG_SVCXPRT

53 54 55
static int map_xdr(struct svcxprt_rdma *xprt,
		   struct xdr_buf *xdr,
		   struct svc_rdma_req_map *vec)
T
Tom Tucker 已提交
56 57 58 59
{
	int sge_no;
	u32 sge_bytes;
	u32 page_bytes;
60
	u32 page_off;
T
Tom Tucker 已提交
61 62
	int page_no;

63 64 65 66 67
	if (xdr->len !=
	    (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
		pr_err("svcrdma: map_xdr: XDR buffer length error\n");
		return -EIO;
	}
68

T
Tom Tucker 已提交
69 70 71 72
	/* Skip the first sge, this is for the RPCRDMA header */
	sge_no = 1;

	/* Head SGE */
73 74
	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
T
Tom Tucker 已提交
75 76 77 78 79 80
	sge_no++;

	/* pages SGE */
	page_no = 0;
	page_bytes = xdr->page_len;
	page_off = xdr->page_base;
81 82 83 84
	while (page_bytes) {
		vec->sge[sge_no].iov_base =
			page_address(xdr->pages[page_no]) + page_off;
		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
T
Tom Tucker 已提交
85
		page_bytes -= sge_bytes;
86
		vec->sge[sge_no].iov_len = sge_bytes;
T
Tom Tucker 已提交
87 88 89 90 91 92 93

		sge_no++;
		page_no++;
		page_off = 0; /* reset for next time through loop */
	}

	/* Tail SGE */
94 95 96
	if (xdr->tail[0].iov_len) {
		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
		vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
T
Tom Tucker 已提交
97 98 99
		sge_no++;
	}

100
	dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
101
		"page_base %u page_len %u head_len %zu tail_len %zu\n",
102 103 104
		sge_no, page_no, xdr->page_base, xdr->page_len,
		xdr->head[0].iov_len, xdr->tail[0].iov_len);

105
	vec->count = sge_no;
106
	return 0;
T
Tom Tucker 已提交
107 108
}

109 110 111 112 113 114 115 116 117 118 119 120 121 122
static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
			      struct xdr_buf *xdr,
			      u32 xdr_off, size_t len, int dir)
{
	struct page *page;
	dma_addr_t dma_addr;
	if (xdr_off < xdr->head[0].iov_len) {
		/* This offset is in the head */
		xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
		page = virt_to_page(xdr->head[0].iov_base);
	} else {
		xdr_off -= xdr->head[0].iov_len;
		if (xdr_off < xdr->page_len) {
			/* This offset is in the page list */
123
			xdr_off += xdr->page_base;
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
			page = xdr->pages[xdr_off >> PAGE_SHIFT];
			xdr_off &= ~PAGE_MASK;
		} else {
			/* This offset is in the tail */
			xdr_off -= xdr->page_len;
			xdr_off += (unsigned long)
				xdr->tail[0].iov_base & ~PAGE_MASK;
			page = virt_to_page(xdr->tail[0].iov_base);
		}
	}
	dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
				   min_t(size_t, PAGE_SIZE, len), dir);
	return dma_addr;
}

139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
/* Returns the address of the first read chunk or <nul> if no read chunk
 * is present
 */
struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
{
	struct rpcrdma_read_chunk *ch =
		(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];

	if (ch->rc_discrim == xdr_zero)
		return NULL;
	return ch;
}

/* Returns the address of the first read write array element or <nul>
 * if no write array list is present
 */
static struct rpcrdma_write_array *
svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
{
	if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
	    rmsgp->rm_body.rm_chunks[1] == xdr_zero)
		return NULL;
	return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
}

/* Returns the address of the first reply array element or <nul> if no
 * reply array is present
 */
static struct rpcrdma_write_array *
svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
{
	struct rpcrdma_read_chunk *rch;
	struct rpcrdma_write_array *wr_ary;
	struct rpcrdma_write_array *rp_ary;

	/* XXX: Need to fix when reply chunk may occur with read list
	 *	and/or write list.
	 */
	if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
	    rmsgp->rm_body.rm_chunks[1] != xdr_zero)
		return NULL;

	rch = svc_rdma_get_read_chunk(rmsgp);
	if (rch) {
		while (rch->rc_discrim != xdr_zero)
			rch++;

		/* The reply chunk follows an empty write array located
		 * at 'rc_position' here. The reply array is at rc_target.
		 */
		rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
		goto found_it;
	}

	wr_ary = svc_rdma_get_write_array(rmsgp);
	if (wr_ary) {
		int chunk = be32_to_cpu(wr_ary->wc_nchunks);

		rp_ary = (struct rpcrdma_write_array *)
			 &wr_ary->wc_array[chunk].wc_target.rs_length;
		goto found_it;
	}

	/* No read list, no write list */
	rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2];

 found_it:
	if (rp_ary->wc_discrim == xdr_zero)
		return NULL;
	return rp_ary;
}

T
Tom Tucker 已提交
212 213 214 215 216 217
/* Assumptions:
 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
 */
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
		      u32 rmr, u64 to,
		      u32 xdr_off, int write_len,
218
		      struct svc_rdma_req_map *vec)
T
Tom Tucker 已提交
219
{
C
Christoph Hellwig 已提交
220
	struct ib_rdma_wr write_wr;
T
Tom Tucker 已提交
221 222 223 224 225 226 227 228
	struct ib_sge *sge;
	int xdr_sge_no;
	int sge_no;
	int sge_bytes;
	int sge_off;
	int bc;
	struct svc_rdma_op_ctxt *ctxt;

229 230 231 232 233
	if (vec->count > RPCSVC_MAXPAGES) {
		pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
		return -EIO;
	}

T
Tom Tucker 已提交
234
	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
235
		"write_len=%d, vec->sge=%p, vec->count=%lu\n",
236
		rmr, (unsigned long long)to, xdr_off,
237
		write_len, vec->sge, vec->count);
T
Tom Tucker 已提交
238 239

	ctxt = svc_rdma_get_context(xprt);
240 241
	ctxt->direction = DMA_TO_DEVICE;
	sge = ctxt->sge;
T
Tom Tucker 已提交
242 243

	/* Find the SGE associated with xdr_off */
244
	for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
T
Tom Tucker 已提交
245
	     xdr_sge_no++) {
246
		if (vec->sge[xdr_sge_no].iov_len > bc)
T
Tom Tucker 已提交
247
			break;
248
		bc -= vec->sge[xdr_sge_no].iov_len;
T
Tom Tucker 已提交
249 250 251 252 253 254 255
	}

	sge_off = bc;
	bc = write_len;
	sge_no = 0;

	/* Copy the remaining SGE */
256 257 258
	while (bc != 0) {
		sge_bytes = min_t(size_t,
			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
T
Tom Tucker 已提交
259
		sge[sge_no].length = sge_bytes;
S
Steve Wise 已提交
260 261 262 263 264 265 266 267 268
		sge[sge_no].addr =
			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
				    sge_bytes, DMA_TO_DEVICE);
		xdr_off += sge_bytes;
		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
					 sge[sge_no].addr))
			goto err;
		atomic_inc(&xprt->sc_dma_used);
		sge[sge_no].lkey = xprt->sc_dma_lkey;
269
		ctxt->count++;
T
Tom Tucker 已提交
270 271 272
		sge_off = 0;
		sge_no++;
		xdr_sge_no++;
273 274 275 276
		if (xdr_sge_no > vec->count) {
			pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
			goto err;
		}
T
Tom Tucker 已提交
277
		bc -= sge_bytes;
278 279
		if (sge_no == xprt->sc_max_sge)
			break;
T
Tom Tucker 已提交
280 281 282 283 284
	}

	/* Prepare WRITE WR */
	memset(&write_wr, 0, sizeof write_wr);
	ctxt->wr_op = IB_WR_RDMA_WRITE;
C
Christoph Hellwig 已提交
285 286 287 288 289 290 291
	write_wr.wr.wr_id = (unsigned long)ctxt;
	write_wr.wr.sg_list = &sge[0];
	write_wr.wr.num_sge = sge_no;
	write_wr.wr.opcode = IB_WR_RDMA_WRITE;
	write_wr.wr.send_flags = IB_SEND_SIGNALED;
	write_wr.rkey = rmr;
	write_wr.remote_addr = to;
T
Tom Tucker 已提交
292 293 294

	/* Post It */
	atomic_inc(&rdma_stat_write);
C
Christoph Hellwig 已提交
295
	if (svc_rdma_send(xprt, &write_wr.wr))
296
		goto err;
297
	return write_len - bc;
298
 err:
299
	svc_rdma_unmap_dma(ctxt);
300 301 302
	svc_rdma_put_context(ctxt, 0);
	/* Fatal error, close transport */
	return -EIO;
T
Tom Tucker 已提交
303 304 305 306 307 308
}

static int send_write_chunks(struct svcxprt_rdma *xprt,
			     struct rpcrdma_msg *rdma_argp,
			     struct rpcrdma_msg *rdma_resp,
			     struct svc_rqst *rqstp,
309
			     struct svc_rdma_req_map *vec)
T
Tom Tucker 已提交
310 311 312 313 314 315
{
	u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
	int write_len;
	u32 xdr_off;
	int chunk_off;
	int chunk_no;
316
	int nchunks;
T
Tom Tucker 已提交
317 318 319 320 321 322 323 324 325 326 327
	struct rpcrdma_write_array *arg_ary;
	struct rpcrdma_write_array *res_ary;
	int ret;

	arg_ary = svc_rdma_get_write_array(rdma_argp);
	if (!arg_ary)
		return 0;
	res_ary = (struct rpcrdma_write_array *)
		&rdma_resp->rm_body.rm_chunks[1];

	/* Write chunks start at the pagelist */
328
	nchunks = be32_to_cpu(arg_ary->wc_nchunks);
T
Tom Tucker 已提交
329
	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
330
	     xfer_len && chunk_no < nchunks;
T
Tom Tucker 已提交
331 332 333 334 335
	     chunk_no++) {
		struct rpcrdma_segment *arg_ch;
		u64 rs_offset;

		arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
336
		write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
T
Tom Tucker 已提交
337 338 339

		/* Prepare the response chunk given the length actually
		 * written */
340
		xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset);
T
Tom Tucker 已提交
341
		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
342 343 344
						arg_ch->rs_handle,
						arg_ch->rs_offset,
						write_len);
T
Tom Tucker 已提交
345 346 347
		chunk_off = 0;
		while (write_len) {
			ret = send_write(xprt, rqstp,
348
					 be32_to_cpu(arg_ch->rs_handle),
T
Tom Tucker 已提交
349 350
					 rs_offset + chunk_off,
					 xdr_off,
351
					 write_len,
352
					 vec);
353
			if (ret <= 0) {
T
Tom Tucker 已提交
354 355 356 357
				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
					ret);
				return -EIO;
			}
358 359 360 361
			chunk_off += ret;
			xdr_off += ret;
			xfer_len -= ret;
			write_len -= ret;
T
Tom Tucker 已提交
362 363 364 365 366 367 368 369 370 371 372 373
		}
	}
	/* Update the req with the number of chunks actually used */
	svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);

	return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
}

static int send_reply_chunks(struct svcxprt_rdma *xprt,
			     struct rpcrdma_msg *rdma_argp,
			     struct rpcrdma_msg *rdma_resp,
			     struct svc_rqst *rqstp,
374
			     struct svc_rdma_req_map *vec)
T
Tom Tucker 已提交
375 376 377 378 379 380
{
	u32 xfer_len = rqstp->rq_res.len;
	int write_len;
	u32 xdr_off;
	int chunk_no;
	int chunk_off;
381
	int nchunks;
T
Tom Tucker 已提交
382 383 384 385 386 387 388 389 390 391 392 393 394 395
	struct rpcrdma_segment *ch;
	struct rpcrdma_write_array *arg_ary;
	struct rpcrdma_write_array *res_ary;
	int ret;

	arg_ary = svc_rdma_get_reply_array(rdma_argp);
	if (!arg_ary)
		return 0;
	/* XXX: need to fix when reply lists occur with read-list and or
	 * write-list */
	res_ary = (struct rpcrdma_write_array *)
		&rdma_resp->rm_body.rm_chunks[2];

	/* xdr offset starts at RPC message */
396
	nchunks = be32_to_cpu(arg_ary->wc_nchunks);
T
Tom Tucker 已提交
397
	for (xdr_off = 0, chunk_no = 0;
398
	     xfer_len && chunk_no < nchunks;
T
Tom Tucker 已提交
399 400 401
	     chunk_no++) {
		u64 rs_offset;
		ch = &arg_ary->wc_array[chunk_no].wc_target;
402
		write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
T
Tom Tucker 已提交
403 404 405

		/* Prepare the reply chunk given the length actually
		 * written */
406
		xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset);
T
Tom Tucker 已提交
407
		svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
408 409
						ch->rs_handle, ch->rs_offset,
						write_len);
T
Tom Tucker 已提交
410 411 412
		chunk_off = 0;
		while (write_len) {
			ret = send_write(xprt, rqstp,
413
					 be32_to_cpu(ch->rs_handle),
T
Tom Tucker 已提交
414 415
					 rs_offset + chunk_off,
					 xdr_off,
416
					 write_len,
417
					 vec);
418
			if (ret <= 0) {
T
Tom Tucker 已提交
419 420 421 422
				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
					ret);
				return -EIO;
			}
423 424 425 426
			chunk_off += ret;
			xdr_off += ret;
			xfer_len -= ret;
			write_len -= ret;
T
Tom Tucker 已提交
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
		}
	}
	/* Update the req with the number of chunks actually used */
	svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);

	return rqstp->rq_res.len;
}

/* This function prepares the portion of the RPCRDMA message to be
 * sent in the RDMA_SEND. This function is called after data sent via
 * RDMA has already been transmitted. There are three cases:
 * - The RPCRDMA header, RPC header, and payload are all sent in a
 *   single RDMA_SEND. This is the "inline" case.
 * - The RPCRDMA header and some portion of the RPC header and data
 *   are sent via this RDMA_SEND and another portion of the data is
 *   sent via RDMA.
 * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
 *   header and data are all transmitted via RDMA.
 * In all three cases, this function prepares the RPCRDMA header in
 * sge[0], the 'type' parameter indicates the type to place in the
 * RPCRDMA header, and the 'byte_count' field indicates how much of
448 449
 * the XDR to include in this RDMA_SEND. NB: The offset of the payload
 * to send is zero in the XDR.
T
Tom Tucker 已提交
450 451 452 453 454 455
 */
static int send_reply(struct svcxprt_rdma *rdma,
		      struct svc_rqst *rqstp,
		      struct page *page,
		      struct rpcrdma_msg *rdma_resp,
		      struct svc_rdma_op_ctxt *ctxt,
456
		      struct svc_rdma_req_map *vec,
T
Tom Tucker 已提交
457 458 459
		      int byte_count)
{
	struct ib_send_wr send_wr;
460
	u32 xdr_off;
T
Tom Tucker 已提交
461 462 463
	int sge_no;
	int sge_bytes;
	int page_no;
464
	int pages;
T
Tom Tucker 已提交
465 466
	int ret;

467 468 469 470 471 472 473
	/* Post a recv buffer to handle another request. */
	ret = svc_rdma_post_recv(rdma);
	if (ret) {
		printk(KERN_INFO
		       "svcrdma: could not post a receive buffer, err=%d."
		       "Closing transport %p.\n", ret, rdma);
		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
474 475
		svc_rdma_put_context(ctxt, 0);
		return -ENOTCONN;
476 477
	}

T
Tom Tucker 已提交
478 479 480 481 482
	/* Prepare the context */
	ctxt->pages[0] = page;
	ctxt->count = 1;

	/* Prepare the SGE for the RPCRDMA Header */
483 484
	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
T
Tom Tucker 已提交
485
	ctxt->sge[0].addr =
486 487
	    ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
			    ctxt->sge[0].length, DMA_TO_DEVICE);
488 489 490 491
	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
		goto err;
	atomic_inc(&rdma->sc_dma_used);

T
Tom Tucker 已提交
492
	ctxt->direction = DMA_TO_DEVICE;
493

494
	/* Map the payload indicated by 'byte_count' */
495
	xdr_off = 0;
496 497
	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
T
Tom Tucker 已提交
498
		byte_count -= sge_bytes;
S
Steve Wise 已提交
499 500 501 502 503 504 505 506 507
		ctxt->sge[sge_no].addr =
			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
				    sge_bytes, DMA_TO_DEVICE);
		xdr_off += sge_bytes;
		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
					 ctxt->sge[sge_no].addr))
			goto err;
		atomic_inc(&rdma->sc_dma_used);
		ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
508
		ctxt->sge[sge_no].length = sge_bytes;
T
Tom Tucker 已提交
509
	}
510 511 512 513
	if (byte_count != 0) {
		pr_err("svcrdma: Could not map %d bytes\n", byte_count);
		goto err;
	}
T
Tom Tucker 已提交
514 515 516 517 518

	/* Save all respages in the ctxt and remove them from the
	 * respages array. They are our pages until the I/O
	 * completes.
	 */
519 520
	pages = rqstp->rq_next_page - rqstp->rq_respages;
	for (page_no = 0; page_no < pages; page_no++) {
T
Tom Tucker 已提交
521 522 523
		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
		ctxt->count++;
		rqstp->rq_respages[page_no] = NULL;
524 525 526 527 528
		/*
		 * If there are more pages than SGE, terminate SGE
		 * list so that svc_rdma_unmap_dma doesn't attempt to
		 * unmap garbage.
		 */
529 530
		if (page_no+1 >= sge_no)
			ctxt->sge[page_no+1].length = 0;
T
Tom Tucker 已提交
531
	}
T
Tom Tucker 已提交
532
	rqstp->rq_next_page = rqstp->rq_respages + 1;
S
Steve Wise 已提交
533

534 535 536 537 538 539 540
	/* The loop above bumps sc_dma_used for each sge. The
	 * xdr_buf.tail gets a separate sge, but resides in the
	 * same page as xdr_buf.head. Don't count it twice.
	 */
	if (sge_no > ctxt->count)
		atomic_dec(&rdma->sc_dma_used);

541 542 543 544
	if (sge_no > rdma->sc_max_sge) {
		pr_err("svcrdma: Too many sges (%d)\n", sge_no);
		goto err;
	}
T
Tom Tucker 已提交
545 546 547 548 549 550 551 552 553 554
	memset(&send_wr, 0, sizeof send_wr);
	ctxt->wr_op = IB_WR_SEND;
	send_wr.wr_id = (unsigned long)ctxt;
	send_wr.sg_list = ctxt->sge;
	send_wr.num_sge = sge_no;
	send_wr.opcode = IB_WR_SEND;
	send_wr.send_flags =  IB_SEND_SIGNALED;

	ret = svc_rdma_send(rdma, &send_wr);
	if (ret)
555
		goto err;
T
Tom Tucker 已提交
556

557 558 559
	return 0;

 err:
S
Steve Wise 已提交
560
	svc_rdma_unmap_dma(ctxt);
561 562
	svc_rdma_put_context(ctxt, 1);
	return -EIO;
T
Tom Tucker 已提交
563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
}

void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}

int svc_rdma_sendto(struct svc_rqst *rqstp)
{
	struct svc_xprt *xprt = rqstp->rq_xprt;
	struct svcxprt_rdma *rdma =
		container_of(xprt, struct svcxprt_rdma, sc_xprt);
	struct rpcrdma_msg *rdma_argp;
	struct rpcrdma_msg *rdma_resp;
	struct rpcrdma_write_array *reply_ary;
	enum rpcrdma_proc reply_type;
	int ret;
	int inline_bytes;
	struct page *res_page;
	struct svc_rdma_op_ctxt *ctxt;
582
	struct svc_rdma_req_map *vec;
T
Tom Tucker 已提交
583 584 585

	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);

C
Chuck Lever 已提交
586 587 588 589
	/* Get the RDMA request header. The receive logic always
	 * places this at the start of page 0.
	 */
	rdma_argp = page_address(rqstp->rq_pages[0]);
T
Tom Tucker 已提交
590

591
	/* Build an req vec for the XDR */
T
Tom Tucker 已提交
592 593
	ctxt = svc_rdma_get_context(rdma);
	ctxt->direction = DMA_TO_DEVICE;
594
	vec = svc_rdma_get_req_map();
595 596 597
	ret = map_xdr(rdma, &rqstp->rq_res, vec);
	if (ret)
		goto err0;
T
Tom Tucker 已提交
598 599 600
	inline_bytes = rqstp->rq_res.len;

	/* Create the RDMA response header */
601
	res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
T
Tom Tucker 已提交
602 603 604 605 606 607 608 609 610 611 612
	rdma_resp = page_address(res_page);
	reply_ary = svc_rdma_get_reply_array(rdma_argp);
	if (reply_ary)
		reply_type = RDMA_NOMSG;
	else
		reply_type = RDMA_MSG;
	svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
					 rdma_resp, reply_type);

	/* Send any write-chunk data and build resp write-list */
	ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
613
				rqstp, vec);
T
Tom Tucker 已提交
614 615 616
	if (ret < 0) {
		printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
		       ret);
617
		goto err1;
T
Tom Tucker 已提交
618 619 620 621 622
	}
	inline_bytes -= ret;

	/* Send any reply-list data and update resp reply-list */
	ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
623
				rqstp, vec);
T
Tom Tucker 已提交
624 625 626
	if (ret < 0) {
		printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
		       ret);
627
		goto err1;
T
Tom Tucker 已提交
628 629 630
	}
	inline_bytes -= ret;

631
	ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
T
Tom Tucker 已提交
632
			 inline_bytes);
633
	svc_rdma_put_req_map(vec);
T
Tom Tucker 已提交
634 635
	dprintk("svcrdma: send_reply returns %d\n", ret);
	return ret;
636 637 638 639

 err1:
	put_page(res_page);
 err0:
640
	svc_rdma_put_req_map(vec);
T
Tom Tucker 已提交
641 642 643
	svc_rdma_put_context(ctxt, 0);
	return ret;
}