iser_memory.c 21.6 KB
Newer Older
1 2
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3
 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
A
Al Viro 已提交
37
#include <linux/highmem.h>
38 39 40 41 42
#include <linux/scatterlist.h>

#include "iscsi_iser.h"

#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
43

44 45 46
/**
 * iser_start_rdma_unaligned_sg
 */
47
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
48 49
					struct iser_data_buf *data,
					struct iser_data_buf *data_copy,
50
					enum iser_data_dir cmd_dir)
51
{
S
Sagi Grimberg 已提交
52
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
53 54
	struct scatterlist *sgl = (struct scatterlist *)data->buf;
	struct scatterlist *sg;
55
	char *mem = NULL;
56 57 58 59 60
	unsigned long  cmd_data_len = 0;
	int dma_nents, i;

	for_each_sg(sgl, sg, data->size, i)
		cmd_data_len += ib_sg_dma_len(dev, sg);
61 62

	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
O
Or Gerlitz 已提交
63
		mem = (void *)__get_free_pages(GFP_ATOMIC,
64
		      ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
65
	else
O
Or Gerlitz 已提交
66
		mem = kmalloc(cmd_data_len, GFP_ATOMIC);
67 68 69

	if (mem == NULL) {
		iser_err("Failed to allocate mem size %d %d for copying sglist\n",
70
			 data->size, (int)cmd_data_len);
71 72 73 74 75 76 77
		return -ENOMEM;
	}

	if (cmd_dir == ISER_DIR_OUT) {
		/* copy the unaligned sg the buffer which is used for RDMA */
		char *p, *from;

78
		sgl = (struct scatterlist *)data->buf;
J
Jens Axboe 已提交
79 80
		p = mem;
		for_each_sg(sgl, sg, data->size, i) {
81
			from = kmap_atomic(sg_page(sg));
82
			memcpy(p,
J
Jens Axboe 已提交
83 84
			       from + sg->offset,
			       sg->length);
85
			kunmap_atomic(from);
J
Jens Axboe 已提交
86
			p += sg->length;
87 88 89
		}
	}

90 91 92 93
	sg_init_one(&data_copy->sg_single, mem, cmd_data_len);
	data_copy->buf = &data_copy->sg_single;
	data_copy->size = 1;
	data_copy->copy_buf = mem;
94

95
	dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,
96 97
				  (cmd_dir == ISER_DIR_OUT) ?
				  DMA_TO_DEVICE : DMA_FROM_DEVICE);
98 99
	BUG_ON(dma_nents == 0);

100 101 102
	data_copy->dma_nents = dma_nents;
	data_copy->data_len = cmd_data_len;

103 104 105 106 107 108
	return 0;
}

/**
 * iser_finalize_rdma_unaligned_sg
 */
109

110
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
111 112 113
				     struct iser_data_buf *data,
				     struct iser_data_buf *data_copy,
				     enum iser_data_dir cmd_dir)
114
{
115
	struct ib_device *dev;
116 117
	unsigned long  cmd_data_len;

S
Sagi Grimberg 已提交
118
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
119

120
	ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
121 122
			(cmd_dir == ISER_DIR_OUT) ?
			DMA_TO_DEVICE : DMA_FROM_DEVICE);
123 124 125

	if (cmd_dir == ISER_DIR_IN) {
		char *mem;
J
Jens Axboe 已提交
126
		struct scatterlist *sgl, *sg;
127 128 129 130 131
		unsigned char *p, *to;
		unsigned int sg_size;
		int i;

		/* copy back read RDMA to unaligned sg */
132
		mem = data_copy->copy_buf;
133

134 135
		sgl = (struct scatterlist *)data->buf;
		sg_size = data->size;
136

J
Jens Axboe 已提交
137 138
		p = mem;
		for_each_sg(sgl, sg, sg_size, i) {
139
			to = kmap_atomic(sg_page(sg));
J
Jens Axboe 已提交
140
			memcpy(to + sg->offset,
141
			       p,
J
Jens Axboe 已提交
142
			       sg->length);
143
			kunmap_atomic(to);
J
Jens Axboe 已提交
144
			p += sg->length;
145 146 147
		}
	}

148
	cmd_data_len = data->data_len;
149 150

	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
151
		free_pages((unsigned long)data_copy->copy_buf,
152
			   ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
153
	else
154
		kfree(data_copy->copy_buf);
155

156
	data_copy->copy_buf = NULL;
157 158
}

159 160
#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)

161 162 163 164 165 166 167 168 169 170 171 172
/**
 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
 * and returns the length of resulting physical address array (may be less than
 * the original due to possible compaction).
 *
 * we build a "page vec" under the assumption that the SG meets the RDMA
 * alignment requirements. Other then the first and last SG elements, all
 * the "internal" elements can be compacted into a list whose elements are
 * dma addresses of physical pages. The code supports also the weird case
 * where --few fragments of the same page-- are present in the SG as
 * consecutive elements. Also, it handles one entry SG.
 */
173

174
static int iser_sg_to_page_vec(struct iser_data_buf *data,
175 176
			       struct ib_device *ibdev, u64 *pages,
			       int *offset, int *data_size)
177
{
178 179
	struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
	u64 start_addr, end_addr, page, chunk_start = 0;
180
	unsigned long total_sz = 0;
181 182
	unsigned int dma_len;
	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
183 184

	/* compute the offset of first element */
185
	*offset = (u64) sgl[0].offset & ~MASK_4K;
186

187 188
	new_chunk = 1;
	cur_page  = 0;
J
Jens Axboe 已提交
189
	for_each_sg(sgl, sg, data->dma_nents, i) {
190 191 192 193 194
		start_addr = ib_sg_dma_address(ibdev, sg);
		if (new_chunk)
			chunk_start = start_addr;
		dma_len = ib_sg_dma_len(ibdev, sg);
		end_addr = start_addr + dma_len;
195
		total_sz += dma_len;
196

197 198 199 200
		/* collect page fragments until aligned or end of SG list */
		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
			new_chunk = 0;
			continue;
201
		}
202 203 204 205 206 207 208
		new_chunk = 1;

		/* address of the first page in the contiguous chunk;
		   masking relevant for the very first SG entry,
		   which might be unaligned */
		page = chunk_start & MASK_4K;
		do {
209
			pages[cur_page++] = page;
210
			page += SIZE_4K;
211
		} while (page < end_addr);
212
	}
213

214 215 216
	*data_size = total_sz;
	iser_dbg("page_vec->data_size:%d cur_page %d\n",
		 *data_size, cur_page);
217 218 219 220 221 222 223 224 225 226
	return cur_page;
}


/**
 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
 * the number of entries which are aligned correctly. Supports the case where
 * consecutive SG elements are actually fragments of the same physcial page.
 */
227 228
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
				      struct ib_device *ibdev)
229
{
230 231 232 233 234 235
	struct scatterlist *sgl, *sg, *next_sg = NULL;
	u64 start_addr, end_addr;
	int i, ret_len, start_check = 0;

	if (data->dma_nents == 1)
		return 1;
236

J
Jens Axboe 已提交
237
	sgl = (struct scatterlist *)data->buf;
238
	start_addr  = ib_sg_dma_address(ibdev, sgl);
239

J
Jens Axboe 已提交
240
	for_each_sg(sgl, sg, data->dma_nents, i) {
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
		if (start_check && !IS_4K_ALIGNED(start_addr))
			break;

		next_sg = sg_next(sg);
		if (!next_sg)
			break;

		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
		start_addr  = ib_sg_dma_address(ibdev, next_sg);

		if (end_addr == start_addr) {
			start_check = 0;
			continue;
		} else
			start_check = 1;

		if (!IS_4K_ALIGNED(end_addr))
			break;
259
	}
260
	ret_len = (next_sg) ? i : i+1;
261 262 263 264 265
	iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
		 ret_len, data->dma_nents, data);
	return ret_len;
}

266 267
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
268
{
J
Jens Axboe 已提交
269 270
	struct scatterlist *sgl = (struct scatterlist *)data->buf;
	struct scatterlist *sg;
271 272
	int i;

J
Jens Axboe 已提交
273
	for_each_sg(sgl, sg, data->dma_nents, i)
274
		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
E
Erez Zilber 已提交
275
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
J
Jens Axboe 已提交
276
			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
J
Jens Axboe 已提交
277
			 sg_page(sg), sg->offset,
J
Jens Axboe 已提交
278
			 sg->length, ib_sg_dma_len(ibdev, sg));
279 280 281 282 283 284 285 286 287 288 289 290 291
}

static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
	int i;

	iser_err("page vec length %d data size %d\n",
		 page_vec->length, page_vec->data_size);
	for (i = 0; i < page_vec->length; i++)
		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
}

static void iser_page_vec_build(struct iser_data_buf *data,
292 293
				struct iser_page_vec *page_vec,
				struct ib_device *ibdev)
294 295 296 297 298 299 300
{
	int page_vec_len = 0;

	page_vec->length = 0;
	page_vec->offset = 0;

	iser_dbg("Translating sg sz: %d\n", data->dma_nents);
301 302 303 304
	page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
					   &page_vec->offset,
					   &page_vec->data_size);
	iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
305 306 307

	page_vec->length = page_vec_len;

308
	if (page_vec_len * SIZE_4K < page_vec->data_size) {
309
		iser_err("page_vec too short to hold this SG\n");
310
		iser_data_buf_dump(data, ibdev);
311 312 313 314 315
		iser_dump_page_vec(page_vec);
		BUG();
	}
}

316 317 318 319
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *data,
			    enum iser_data_dir iser_dir,
			    enum dma_data_direction dma_dir)
320
{
321
	struct ib_device *dev;
322

323
	iser_task->dir[iser_dir] = 1;
S
Sagi Grimberg 已提交
324
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
325

326
	data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
327 328 329 330 331 332 333
	if (data->dma_nents == 0) {
		iser_err("dma_map_sg failed!!!\n");
		return -EINVAL;
	}
	return 0;
}

334 335
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
			      struct iser_data_buf *data)
336
{
337
	struct ib_device *dev;
338

S
Sagi Grimberg 已提交
339
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
340
	ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);
341 342
}

343 344
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
			      struct ib_device *ibdev,
345 346
			      struct iser_data_buf *mem,
			      struct iser_data_buf *mem_copy,
347 348 349
			      enum iser_data_dir cmd_dir,
			      int aligned_len)
{
350
	struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn;
351 352 353 354 355 356 357 358 359

	iscsi_conn->fmr_unalign_cnt++;
	iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
		  aligned_len, mem->size);

	if (iser_debug_level > 0)
		iser_data_buf_dump(mem, ibdev);

	/* unmap the command data before accessing it */
360
	iser_dma_unmap_task_data(iser_task, mem);
361 362 363

	/* allocate copy buf, if we are writing, copy the */
	/* unaligned scatterlist, dma map the copy        */
364 365
	if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0)
		return -ENOMEM;
366 367 368 369

	return 0;
}

370
/**
371 372
 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
 * using FMR (if possible) obtaining rkey and va
373 374 375
 *
 * returns 0 on success, errno code on failure
 */
376 377
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
			  enum iser_data_dir cmd_dir)
378
{
S
Sagi Grimberg 已提交
379 380
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device   *device = ib_conn->device;
381
	struct ib_device     *ibdev = device->ib_device;
382
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
383 384 385
	struct iser_regd_buf *regd_buf;
	int aligned_len;
	int err;
E
Erez Zilber 已提交
386
	int i;
387
	struct scatterlist *sg;
388

389
	regd_buf = &iser_task->rdma_regd[cmd_dir];
390

391
	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
392
	if (aligned_len != mem->dma_nents) {
393 394
		err = fall_to_bounce_buf(iser_task, ibdev, mem,
					 &iser_task->data_copy[cmd_dir],
395 396 397 398 399
					 cmd_dir, aligned_len);
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
400
		mem = &iser_task->data_copy[cmd_dir];
401 402
	}

403 404 405 406 407 408
	/* if there a single dma entry, FMR is not needed */
	if (mem->dma_nents == 1) {
		sg = (struct scatterlist *)mem->buf;

		regd_buf->reg.lkey = device->mr->lkey;
		regd_buf->reg.rkey = device->mr->rkey;
409 410
		regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
		regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
411 412 413 414 415 416 417 418

		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
			 "va: 0x%08lX sz: %ld]\n",
			 (unsigned int)regd_buf->reg.lkey,
			 (unsigned int)regd_buf->reg.rkey,
			 (unsigned long)regd_buf->reg.va,
			 (unsigned long)regd_buf->reg.len);
	} else { /* use FMR for multiple dma entries */
S
Sagi Grimberg 已提交
419 420
		iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev);
		err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,
421
					&regd_buf->reg);
422
		if (err && err != -EAGAIN) {
423
			iser_data_buf_dump(mem, ibdev);
424 425 426
			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
				 mem->dma_nents,
				 ntoh24(iser_task->desc.iscsi_header.dlength));
427
			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
S
Sagi Grimberg 已提交
428 429 430 431
				 ib_conn->fmr.page_vec->data_size,
				 ib_conn->fmr.page_vec->length,
				 ib_conn->fmr.page_vec->offset);
			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
432
				iser_err("page_vec[%d] = 0x%llx\n", i,
S
Sagi Grimberg 已提交
433
					 (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
E
Erez Zilber 已提交
434
		}
435 436
		if (err)
			return err;
E
Erez Zilber 已提交
437
	}
438 439
	return 0;
}
440

S
Sagi Grimberg 已提交
441
static void
442 443 444
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
		    struct ib_sig_domain *domain)
{
445
	domain->sig_type = IB_SIG_TYPE_T10_DIF;
S
Sagi Grimberg 已提交
446 447
	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
448 449 450 451 452 453 454
	/*
	 * At the moment we hard code those, but in the future
	 * we will take them from sc.
	 */
	domain->sig.dif.apptag_check_mask = 0xffff;
	domain->sig.dif.app_escape = true;
	domain->sig.dif.ref_escape = true;
S
Sagi Grimberg 已提交
455
	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
456
		domain->sig.dif.ref_remap = true;
457
};
458 459 460 461 462 463 464

static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
	switch (scsi_get_prot_op(sc)) {
	case SCSI_PROT_WRITE_INSERT:
	case SCSI_PROT_READ_STRIP:
465
		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
466
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
467 468 469 470
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
		break;
	case SCSI_PROT_READ_INSERT:
	case SCSI_PROT_WRITE_STRIP:
471
		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
472
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
473 474
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
475 476 477
		break;
	case SCSI_PROT_READ_PASS:
	case SCSI_PROT_WRITE_PASS:
478
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
479
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
480
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
481 482
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
483 484 485 486 487 488
		break;
	default:
		iser_err("Unsupported PI operation %d\n",
			 scsi_get_prot_op(sc));
		return -EINVAL;
	}
489

490 491 492
	return 0;
}

S
Sagi Grimberg 已提交
493
static inline void
494 495
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
S
Sagi Grimberg 已提交
496 497 498 499 500
	*mask = 0;
	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
		*mask |= ISER_CHECK_REFTAG;
	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
		*mask |= ISER_CHECK_GUARD;
501 502
}

503 504 505 506 507 508 509 510 511 512 513 514 515 516
static void
iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
	u32 rkey;

	memset(inv_wr, 0, sizeof(*inv_wr));
	inv_wr->opcode = IB_WR_LOCAL_INV;
	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
	inv_wr->ex.invalidate_rkey = mr->rkey;

	rkey = ib_inc_rkey(mr->rkey);
	ib_update_fast_reg_key(mr, rkey);
}

517 518 519 520 521
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
		struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
		struct ib_sge *prot_sge, struct ib_sge *sig_sge)
{
S
Sagi Grimberg 已提交
522
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
523 524 525 526 527 528 529 530 531 532 533
	struct iser_pi_context *pi_ctx = desc->pi_ctx;
	struct ib_send_wr sig_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
	struct ib_sig_attrs sig_attrs;
	int ret;

	memset(&sig_attrs, 0, sizeof(sig_attrs));
	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
	if (ret)
		goto err;

S
Sagi Grimberg 已提交
534
	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
535 536

	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
537
		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
		wr = &inv_wr;
	}

	memset(&sig_wr, 0, sizeof(sig_wr));
	sig_wr.opcode = IB_WR_REG_SIG_MR;
	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
	sig_wr.sg_list = data_sge;
	sig_wr.num_sge = 1;
	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
	if (scsi_prot_sg_count(iser_task->sc))
		sig_wr.wr.sig_handover.prot = prot_sge;
	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
					      IB_ACCESS_REMOTE_READ |
					      IB_ACCESS_REMOTE_WRITE;

	if (!wr)
		wr = &sig_wr;
	else
		wr->next = &sig_wr;

S
Sagi Grimberg 已提交
559
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
560 561 562 563 564 565 566 567
	if (ret) {
		iser_err("reg_sig_mr failed, ret:%d\n", ret);
		goto err;
	}
	desc->reg_indicators &= ~ISER_SIG_KEY_VALID;

	sig_sge->lkey = pi_ctx->sig_mr->lkey;
	sig_sge->addr = 0;
S
Sagi Grimberg 已提交
568
	sig_sge->length = scsi_transfer_length(iser_task->sc);
569 570 571 572 573 574 575 576

	iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
		 sig_sge->addr, sig_sge->length,
		 sig_sge->lkey);
err:
	return ret;
}

577
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
578
			    struct iser_regd_buf *regd_buf,
579
			    struct iser_data_buf *mem,
580
			    enum iser_reg_indicator ind,
581
			    struct ib_sge *sge)
582
{
583
	struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
S
Sagi Grimberg 已提交
584 585
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
586
	struct ib_device *ibdev = device->ib_device;
587 588
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *frpl;
589 590
	struct ib_send_wr fastreg_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
	int ret, offset, size, plen;

	/* if there a single dma entry, dma mr suffices */
	if (mem->dma_nents == 1) {
		struct scatterlist *sg = (struct scatterlist *)mem->buf;

		sge->lkey = device->mr->lkey;
		sge->addr   = ib_sg_dma_address(ibdev, &sg[0]);
		sge->length  = ib_sg_dma_len(ibdev, &sg[0]);

		iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
			 sge->lkey, sge->addr, sge->length);
		return 0;
	}

606 607 608 609 610 611 612 613 614
	if (ind == ISER_DATA_KEY_VALID) {
		mr = desc->data_mr;
		frpl = desc->data_frpl;
	} else {
		mr = desc->pi_ctx->prot_mr;
		frpl = desc->pi_ctx->prot_frpl;
	}

	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
615 616 617 618 619
				   &offset, &size);
	if (plen * SIZE_4K < size) {
		iser_err("fast reg page_list too short to hold this SG\n");
		return -EINVAL;
	}
620

621
	if (!(desc->reg_indicators & ind)) {
622
		iser_inv_rkey(&inv_wr, mr);
623 624 625 626 627
		wr = &inv_wr;
	}

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
628
	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
629
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
630 631
	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
	fastreg_wr.wr.fast_reg.page_list = frpl;
632
	fastreg_wr.wr.fast_reg.page_list_len = plen;
633
	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
634
	fastreg_wr.wr.fast_reg.length = size;
635
	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
636 637 638 639
	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
					       IB_ACCESS_REMOTE_WRITE |
					       IB_ACCESS_REMOTE_READ);

640
	if (!wr)
641
		wr = &fastreg_wr;
642
	else
643 644
		wr->next = &fastreg_wr;

S
Sagi Grimberg 已提交
645
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
646 647 648 649
	if (ret) {
		iser_err("fast registration failed, ret:%d\n", ret);
		return ret;
	}
650
	desc->reg_indicators &= ~ind;
651

652 653
	sge->lkey = mr->lkey;
	sge->addr = frpl->page_list[0] + offset;
654
	sge->length = size;
655 656 657 658 659

	return ret;
}

/**
660
 * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
661 662 663 664
 * using Fast Registration WR (if possible) obtaining rkey and va
 *
 * returns 0 on success, errno code on failure
 */
665 666
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
			      enum iser_data_dir cmd_dir)
667
{
S
Sagi Grimberg 已提交
668 669
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
670 671 672
	struct ib_device *ibdev = device->ib_device;
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
	struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
673 674
	struct fast_reg_descriptor *desc = NULL;
	struct ib_sge data_sge;
675 676 677 678 679
	int err, aligned_len;
	unsigned long flags;

	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
	if (aligned_len != mem->dma_nents) {
680 681
		err = fall_to_bounce_buf(iser_task, ibdev, mem,
					 &iser_task->data_copy[cmd_dir],
682 683 684 685 686 687 688 689
					 cmd_dir, aligned_len);
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
		mem = &iser_task->data_copy[cmd_dir];
	}

690 691
	if (mem->dma_nents != 1 ||
	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
S
Sagi Grimberg 已提交
692 693
		spin_lock_irqsave(&ib_conn->lock, flags);
		desc = list_first_entry(&ib_conn->fastreg.pool,
694 695
					struct fast_reg_descriptor, list);
		list_del(&desc->list);
S
Sagi Grimberg 已提交
696
		spin_unlock_irqrestore(&ib_conn->lock, flags);
697 698
		regd_buf->reg.mem_h = desc;
	}
699

700 701
	err = iser_fast_reg_mr(iser_task, regd_buf, mem,
			       ISER_DATA_KEY_VALID, &data_sge);
702 703 704
	if (err)
		goto err_reg;

705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
		struct ib_sge prot_sge, sig_sge;

		memset(&prot_sge, 0, sizeof(prot_sge));
		if (scsi_prot_sg_count(iser_task->sc)) {
			mem = &iser_task->prot[cmd_dir];
			aligned_len = iser_data_buf_aligned_len(mem, ibdev);
			if (aligned_len != mem->dma_nents) {
				err = fall_to_bounce_buf(iser_task, ibdev, mem,
							 &iser_task->prot_copy[cmd_dir],
							 cmd_dir, aligned_len);
				if (err) {
					iser_err("failed to allocate bounce buffer\n");
					return err;
				}
				mem = &iser_task->prot_copy[cmd_dir];
			}

			err = iser_fast_reg_mr(iser_task, regd_buf, mem,
					       ISER_PROT_KEY_VALID, &prot_sge);
			if (err)
				goto err_reg;
		}

		err = iser_reg_sig_mr(iser_task, desc, &data_sge,
				      &prot_sge, &sig_sge);
		if (err) {
			iser_err("Failed to register signature mr\n");
			return err;
		}
		desc->reg_indicators |= ISER_FASTREG_PROTECTED;

		regd_buf->reg.lkey = sig_sge.lkey;
		regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
		regd_buf->reg.va = sig_sge.addr;
		regd_buf->reg.len = sig_sge.length;
741
	} else {
742
		if (desc)
743
			regd_buf->reg.rkey = desc->data_mr->rkey;
744
		else
745
			regd_buf->reg.rkey = device->mr->rkey;
746

747 748 749 750
		regd_buf->reg.lkey = data_sge.lkey;
		regd_buf->reg.va = data_sge.addr;
		regd_buf->reg.len = data_sge.length;
	}
751

752 753
	return 0;
err_reg:
754
	if (desc) {
S
Sagi Grimberg 已提交
755 756 757
		spin_lock_irqsave(&ib_conn->lock, flags);
		list_add_tail(&desc->list, &ib_conn->fastreg.pool);
		spin_unlock_irqrestore(&ib_conn->lock, flags);
758 759
	}

760 761
	return err;
}