iser_memory.c 21.6 KB
Newer Older
1 2
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3
 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
A
Al Viro 已提交
37
#include <linux/highmem.h>
38 39 40 41 42
#include <linux/scatterlist.h>

#include "iscsi_iser.h"

#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
43

44 45 46
/**
 * iser_start_rdma_unaligned_sg
 */
47
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
48 49
					struct iser_data_buf *data,
					struct iser_data_buf *data_copy,
50
					enum iser_data_dir cmd_dir)
51
{
S
Sagi Grimberg 已提交
52
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
53 54
	struct scatterlist *sgl = (struct scatterlist *)data->buf;
	struct scatterlist *sg;
55
	char *mem = NULL;
56
	unsigned long  cmd_data_len = data->data_len;
57 58
	int dma_nents, i;

59
	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
O
Or Gerlitz 已提交
60
		mem = (void *)__get_free_pages(GFP_ATOMIC,
61
		      ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
62
	else
O
Or Gerlitz 已提交
63
		mem = kmalloc(cmd_data_len, GFP_ATOMIC);
64 65 66

	if (mem == NULL) {
		iser_err("Failed to allocate mem size %d %d for copying sglist\n",
67
			 data->size, (int)cmd_data_len);
68 69 70 71 72 73 74
		return -ENOMEM;
	}

	if (cmd_dir == ISER_DIR_OUT) {
		/* copy the unaligned sg the buffer which is used for RDMA */
		char *p, *from;

75
		sgl = (struct scatterlist *)data->buf;
J
Jens Axboe 已提交
76 77
		p = mem;
		for_each_sg(sgl, sg, data->size, i) {
78
			from = kmap_atomic(sg_page(sg));
79
			memcpy(p,
J
Jens Axboe 已提交
80 81
			       from + sg->offset,
			       sg->length);
82
			kunmap_atomic(from);
J
Jens Axboe 已提交
83
			p += sg->length;
84 85 86
		}
	}

87 88 89 90
	sg_init_one(&data_copy->sg_single, mem, cmd_data_len);
	data_copy->buf = &data_copy->sg_single;
	data_copy->size = 1;
	data_copy->copy_buf = mem;
91

92
	dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,
93 94
				  (cmd_dir == ISER_DIR_OUT) ?
				  DMA_TO_DEVICE : DMA_FROM_DEVICE);
95 96
	BUG_ON(dma_nents == 0);

97 98 99
	data_copy->dma_nents = dma_nents;
	data_copy->data_len = cmd_data_len;

100 101 102 103 104 105
	return 0;
}

/**
 * iser_finalize_rdma_unaligned_sg
 */
106

107
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
108 109 110
				     struct iser_data_buf *data,
				     struct iser_data_buf *data_copy,
				     enum iser_data_dir cmd_dir)
111
{
112
	struct ib_device *dev;
113 114
	unsigned long  cmd_data_len;

S
Sagi Grimberg 已提交
115
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
116

117
	ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
118 119
			(cmd_dir == ISER_DIR_OUT) ?
			DMA_TO_DEVICE : DMA_FROM_DEVICE);
120 121 122

	if (cmd_dir == ISER_DIR_IN) {
		char *mem;
J
Jens Axboe 已提交
123
		struct scatterlist *sgl, *sg;
124 125 126 127 128
		unsigned char *p, *to;
		unsigned int sg_size;
		int i;

		/* copy back read RDMA to unaligned sg */
129
		mem = data_copy->copy_buf;
130

131 132
		sgl = (struct scatterlist *)data->buf;
		sg_size = data->size;
133

J
Jens Axboe 已提交
134 135
		p = mem;
		for_each_sg(sgl, sg, sg_size, i) {
136
			to = kmap_atomic(sg_page(sg));
J
Jens Axboe 已提交
137
			memcpy(to + sg->offset,
138
			       p,
J
Jens Axboe 已提交
139
			       sg->length);
140
			kunmap_atomic(to);
J
Jens Axboe 已提交
141
			p += sg->length;
142 143 144
		}
	}

145
	cmd_data_len = data->data_len;
146 147

	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
148
		free_pages((unsigned long)data_copy->copy_buf,
149
			   ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
150
	else
151
		kfree(data_copy->copy_buf);
152

153
	data_copy->copy_buf = NULL;
154 155
}

156 157
#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)

158 159 160 161 162 163 164 165 166 167 168 169
/**
 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
 * and returns the length of resulting physical address array (may be less than
 * the original due to possible compaction).
 *
 * we build a "page vec" under the assumption that the SG meets the RDMA
 * alignment requirements. Other then the first and last SG elements, all
 * the "internal" elements can be compacted into a list whose elements are
 * dma addresses of physical pages. The code supports also the weird case
 * where --few fragments of the same page-- are present in the SG as
 * consecutive elements. Also, it handles one entry SG.
 */
170

171
static int iser_sg_to_page_vec(struct iser_data_buf *data,
172 173
			       struct ib_device *ibdev, u64 *pages,
			       int *offset, int *data_size)
174
{
175 176
	struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
	u64 start_addr, end_addr, page, chunk_start = 0;
177
	unsigned long total_sz = 0;
178 179
	unsigned int dma_len;
	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
180 181

	/* compute the offset of first element */
182
	*offset = (u64) sgl[0].offset & ~MASK_4K;
183

184 185
	new_chunk = 1;
	cur_page  = 0;
J
Jens Axboe 已提交
186
	for_each_sg(sgl, sg, data->dma_nents, i) {
187 188 189 190 191
		start_addr = ib_sg_dma_address(ibdev, sg);
		if (new_chunk)
			chunk_start = start_addr;
		dma_len = ib_sg_dma_len(ibdev, sg);
		end_addr = start_addr + dma_len;
192
		total_sz += dma_len;
193

194 195 196 197
		/* collect page fragments until aligned or end of SG list */
		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
			new_chunk = 0;
			continue;
198
		}
199 200 201 202 203 204 205
		new_chunk = 1;

		/* address of the first page in the contiguous chunk;
		   masking relevant for the very first SG entry,
		   which might be unaligned */
		page = chunk_start & MASK_4K;
		do {
206
			pages[cur_page++] = page;
207
			page += SIZE_4K;
208
		} while (page < end_addr);
209
	}
210

211 212 213
	*data_size = total_sz;
	iser_dbg("page_vec->data_size:%d cur_page %d\n",
		 *data_size, cur_page);
214 215 216 217 218 219 220 221 222 223
	return cur_page;
}


/**
 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
 * the number of entries which are aligned correctly. Supports the case where
 * consecutive SG elements are actually fragments of the same physcial page.
 */
224 225
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
				      struct ib_device *ibdev)
226
{
227 228 229 230 231 232
	struct scatterlist *sgl, *sg, *next_sg = NULL;
	u64 start_addr, end_addr;
	int i, ret_len, start_check = 0;

	if (data->dma_nents == 1)
		return 1;
233

J
Jens Axboe 已提交
234
	sgl = (struct scatterlist *)data->buf;
235
	start_addr  = ib_sg_dma_address(ibdev, sgl);
236

J
Jens Axboe 已提交
237
	for_each_sg(sgl, sg, data->dma_nents, i) {
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
		if (start_check && !IS_4K_ALIGNED(start_addr))
			break;

		next_sg = sg_next(sg);
		if (!next_sg)
			break;

		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
		start_addr  = ib_sg_dma_address(ibdev, next_sg);

		if (end_addr == start_addr) {
			start_check = 0;
			continue;
		} else
			start_check = 1;

		if (!IS_4K_ALIGNED(end_addr))
			break;
256
	}
257
	ret_len = (next_sg) ? i : i+1;
258 259 260 261 262
	iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
		 ret_len, data->dma_nents, data);
	return ret_len;
}

263 264
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
265
{
J
Jens Axboe 已提交
266 267
	struct scatterlist *sgl = (struct scatterlist *)data->buf;
	struct scatterlist *sg;
268 269
	int i;

J
Jens Axboe 已提交
270
	for_each_sg(sgl, sg, data->dma_nents, i)
271
		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
E
Erez Zilber 已提交
272
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
J
Jens Axboe 已提交
273
			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
J
Jens Axboe 已提交
274
			 sg_page(sg), sg->offset,
J
Jens Axboe 已提交
275
			 sg->length, ib_sg_dma_len(ibdev, sg));
276 277 278 279 280 281 282 283 284 285 286 287 288
}

static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
	int i;

	iser_err("page vec length %d data size %d\n",
		 page_vec->length, page_vec->data_size);
	for (i = 0; i < page_vec->length; i++)
		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
}

static void iser_page_vec_build(struct iser_data_buf *data,
289 290
				struct iser_page_vec *page_vec,
				struct ib_device *ibdev)
291 292 293 294 295 296 297
{
	int page_vec_len = 0;

	page_vec->length = 0;
	page_vec->offset = 0;

	iser_dbg("Translating sg sz: %d\n", data->dma_nents);
298 299 300 301
	page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
					   &page_vec->offset,
					   &page_vec->data_size);
	iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
302 303 304

	page_vec->length = page_vec_len;

305
	if (page_vec_len * SIZE_4K < page_vec->data_size) {
306
		iser_err("page_vec too short to hold this SG\n");
307
		iser_data_buf_dump(data, ibdev);
308 309 310 311 312
		iser_dump_page_vec(page_vec);
		BUG();
	}
}

313 314 315 316
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *data,
			    enum iser_data_dir iser_dir,
			    enum dma_data_direction dma_dir)
317
{
318
	struct ib_device *dev;
319

320
	iser_task->dir[iser_dir] = 1;
S
Sagi Grimberg 已提交
321
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
322

323
	data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
324 325 326 327 328 329 330
	if (data->dma_nents == 0) {
		iser_err("dma_map_sg failed!!!\n");
		return -EINVAL;
	}
	return 0;
}

331
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
332 333
			      struct iser_data_buf *data,
			      enum dma_data_direction dir)
334
{
335
	struct ib_device *dev;
336

S
Sagi Grimberg 已提交
337
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
338
	ib_dma_unmap_sg(dev, data->buf, data->size, dir);
339 340
}

341 342
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
			      struct ib_device *ibdev,
343 344
			      struct iser_data_buf *mem,
			      struct iser_data_buf *mem_copy,
345 346 347
			      enum iser_data_dir cmd_dir,
			      int aligned_len)
{
348
	struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn;
349 350 351 352 353 354 355 356 357

	iscsi_conn->fmr_unalign_cnt++;
	iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
		  aligned_len, mem->size);

	if (iser_debug_level > 0)
		iser_data_buf_dump(mem, ibdev);

	/* unmap the command data before accessing it */
358 359 360
	iser_dma_unmap_task_data(iser_task, mem,
				 (cmd_dir == ISER_DIR_OUT) ?
				 DMA_TO_DEVICE : DMA_FROM_DEVICE);
361 362 363

	/* allocate copy buf, if we are writing, copy the */
	/* unaligned scatterlist, dma map the copy        */
364 365
	if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0)
		return -ENOMEM;
366 367 368 369

	return 0;
}

370
/**
371 372
 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
 * using FMR (if possible) obtaining rkey and va
373 374 375
 *
 * returns 0 on success, errno code on failure
 */
376 377
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
			  enum iser_data_dir cmd_dir)
378
{
S
Sagi Grimberg 已提交
379 380
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device   *device = ib_conn->device;
381
	struct ib_device     *ibdev = device->ib_device;
382
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
383 384 385
	struct iser_regd_buf *regd_buf;
	int aligned_len;
	int err;
E
Erez Zilber 已提交
386
	int i;
387
	struct scatterlist *sg;
388

389
	regd_buf = &iser_task->rdma_regd[cmd_dir];
390

391
	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
392
	if (aligned_len != mem->dma_nents) {
393 394
		err = fall_to_bounce_buf(iser_task, ibdev, mem,
					 &iser_task->data_copy[cmd_dir],
395 396 397 398 399
					 cmd_dir, aligned_len);
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
400
		mem = &iser_task->data_copy[cmd_dir];
401 402
	}

403 404 405 406 407 408
	/* if there a single dma entry, FMR is not needed */
	if (mem->dma_nents == 1) {
		sg = (struct scatterlist *)mem->buf;

		regd_buf->reg.lkey = device->mr->lkey;
		regd_buf->reg.rkey = device->mr->rkey;
409 410
		regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
		regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
411 412 413 414 415 416 417 418

		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
			 "va: 0x%08lX sz: %ld]\n",
			 (unsigned int)regd_buf->reg.lkey,
			 (unsigned int)regd_buf->reg.rkey,
			 (unsigned long)regd_buf->reg.va,
			 (unsigned long)regd_buf->reg.len);
	} else { /* use FMR for multiple dma entries */
S
Sagi Grimberg 已提交
419 420
		iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev);
		err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,
421
					&regd_buf->reg);
422
		if (err && err != -EAGAIN) {
423
			iser_data_buf_dump(mem, ibdev);
424 425 426
			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
				 mem->dma_nents,
				 ntoh24(iser_task->desc.iscsi_header.dlength));
427
			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
S
Sagi Grimberg 已提交
428 429 430 431
				 ib_conn->fmr.page_vec->data_size,
				 ib_conn->fmr.page_vec->length,
				 ib_conn->fmr.page_vec->offset);
			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
432
				iser_err("page_vec[%d] = 0x%llx\n", i,
S
Sagi Grimberg 已提交
433
					 (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
E
Erez Zilber 已提交
434
		}
435 436
		if (err)
			return err;
E
Erez Zilber 已提交
437
	}
438 439
	return 0;
}
440

S
Sagi Grimberg 已提交
441
static void
442 443 444
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
		    struct ib_sig_domain *domain)
{
445
	domain->sig_type = IB_SIG_TYPE_T10_DIF;
S
Sagi Grimberg 已提交
446 447
	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
448 449 450 451 452 453 454
	/*
	 * At the moment we hard code those, but in the future
	 * we will take them from sc.
	 */
	domain->sig.dif.apptag_check_mask = 0xffff;
	domain->sig.dif.app_escape = true;
	domain->sig.dif.ref_escape = true;
S
Sagi Grimberg 已提交
455
	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
456
		domain->sig.dif.ref_remap = true;
457
};
458 459 460 461 462 463 464

static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
	switch (scsi_get_prot_op(sc)) {
	case SCSI_PROT_WRITE_INSERT:
	case SCSI_PROT_READ_STRIP:
465
		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
466
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
467 468 469 470
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
		break;
	case SCSI_PROT_READ_INSERT:
	case SCSI_PROT_WRITE_STRIP:
471
		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
472
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
473 474
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
475 476 477
		break;
	case SCSI_PROT_READ_PASS:
	case SCSI_PROT_WRITE_PASS:
478
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
479
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
480
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
481 482
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
483 484 485 486 487 488
		break;
	default:
		iser_err("Unsupported PI operation %d\n",
			 scsi_get_prot_op(sc));
		return -EINVAL;
	}
489

490 491 492
	return 0;
}

S
Sagi Grimberg 已提交
493
static inline void
494 495
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
S
Sagi Grimberg 已提交
496 497 498 499 500
	*mask = 0;
	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
		*mask |= ISER_CHECK_REFTAG;
	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
		*mask |= ISER_CHECK_GUARD;
501 502
}

503 504 505 506 507 508 509 510 511 512 513 514 515 516
static void
iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
	u32 rkey;

	memset(inv_wr, 0, sizeof(*inv_wr));
	inv_wr->opcode = IB_WR_LOCAL_INV;
	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
	inv_wr->ex.invalidate_rkey = mr->rkey;

	rkey = ib_inc_rkey(mr->rkey);
	ib_update_fast_reg_key(mr, rkey);
}

517 518 519 520 521
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
		struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
		struct ib_sge *prot_sge, struct ib_sge *sig_sge)
{
S
Sagi Grimberg 已提交
522
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
523 524 525 526 527 528 529 530 531 532 533
	struct iser_pi_context *pi_ctx = desc->pi_ctx;
	struct ib_send_wr sig_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
	struct ib_sig_attrs sig_attrs;
	int ret;

	memset(&sig_attrs, 0, sizeof(sig_attrs));
	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
	if (ret)
		goto err;

S
Sagi Grimberg 已提交
534
	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
535 536

	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
537
		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
		wr = &inv_wr;
	}

	memset(&sig_wr, 0, sizeof(sig_wr));
	sig_wr.opcode = IB_WR_REG_SIG_MR;
	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
	sig_wr.sg_list = data_sge;
	sig_wr.num_sge = 1;
	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
	if (scsi_prot_sg_count(iser_task->sc))
		sig_wr.wr.sig_handover.prot = prot_sge;
	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
					      IB_ACCESS_REMOTE_READ |
					      IB_ACCESS_REMOTE_WRITE;

	if (!wr)
		wr = &sig_wr;
	else
		wr->next = &sig_wr;

S
Sagi Grimberg 已提交
559
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
560 561 562 563 564 565 566 567
	if (ret) {
		iser_err("reg_sig_mr failed, ret:%d\n", ret);
		goto err;
	}
	desc->reg_indicators &= ~ISER_SIG_KEY_VALID;

	sig_sge->lkey = pi_ctx->sig_mr->lkey;
	sig_sge->addr = 0;
S
Sagi Grimberg 已提交
568
	sig_sge->length = scsi_transfer_length(iser_task->sc);
569 570 571 572 573 574 575 576

	iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
		 sig_sge->addr, sig_sge->length,
		 sig_sge->lkey);
err:
	return ret;
}

577
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
578
			    struct iser_regd_buf *regd_buf,
579
			    struct iser_data_buf *mem,
580
			    enum iser_reg_indicator ind,
581
			    struct ib_sge *sge)
582
{
583
	struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
S
Sagi Grimberg 已提交
584 585
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
586
	struct ib_device *ibdev = device->ib_device;
587 588
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *frpl;
589 590
	struct ib_send_wr fastreg_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
	int ret, offset, size, plen;

	/* if there a single dma entry, dma mr suffices */
	if (mem->dma_nents == 1) {
		struct scatterlist *sg = (struct scatterlist *)mem->buf;

		sge->lkey = device->mr->lkey;
		sge->addr   = ib_sg_dma_address(ibdev, &sg[0]);
		sge->length  = ib_sg_dma_len(ibdev, &sg[0]);

		iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
			 sge->lkey, sge->addr, sge->length);
		return 0;
	}

606 607 608 609 610 611 612 613 614
	if (ind == ISER_DATA_KEY_VALID) {
		mr = desc->data_mr;
		frpl = desc->data_frpl;
	} else {
		mr = desc->pi_ctx->prot_mr;
		frpl = desc->pi_ctx->prot_frpl;
	}

	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
615 616 617 618 619
				   &offset, &size);
	if (plen * SIZE_4K < size) {
		iser_err("fast reg page_list too short to hold this SG\n");
		return -EINVAL;
	}
620

621
	if (!(desc->reg_indicators & ind)) {
622
		iser_inv_rkey(&inv_wr, mr);
623 624 625 626 627
		wr = &inv_wr;
	}

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
628
	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
629
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
630 631
	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
	fastreg_wr.wr.fast_reg.page_list = frpl;
632
	fastreg_wr.wr.fast_reg.page_list_len = plen;
633
	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
634
	fastreg_wr.wr.fast_reg.length = size;
635
	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
636 637 638 639
	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
					       IB_ACCESS_REMOTE_WRITE |
					       IB_ACCESS_REMOTE_READ);

640
	if (!wr)
641
		wr = &fastreg_wr;
642
	else
643 644
		wr->next = &fastreg_wr;

S
Sagi Grimberg 已提交
645
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
646 647 648 649
	if (ret) {
		iser_err("fast registration failed, ret:%d\n", ret);
		return ret;
	}
650
	desc->reg_indicators &= ~ind;
651

652 653
	sge->lkey = mr->lkey;
	sge->addr = frpl->page_list[0] + offset;
654
	sge->length = size;
655 656 657 658 659

	return ret;
}

/**
660
 * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
661 662 663 664
 * using Fast Registration WR (if possible) obtaining rkey and va
 *
 * returns 0 on success, errno code on failure
 */
665 666
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
			      enum iser_data_dir cmd_dir)
667
{
S
Sagi Grimberg 已提交
668 669
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
670 671 672
	struct ib_device *ibdev = device->ib_device;
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
	struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
673 674
	struct fast_reg_descriptor *desc = NULL;
	struct ib_sge data_sge;
675 676 677 678 679
	int err, aligned_len;
	unsigned long flags;

	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
	if (aligned_len != mem->dma_nents) {
680 681
		err = fall_to_bounce_buf(iser_task, ibdev, mem,
					 &iser_task->data_copy[cmd_dir],
682 683 684 685 686 687 688 689
					 cmd_dir, aligned_len);
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
		mem = &iser_task->data_copy[cmd_dir];
	}

690 691
	if (mem->dma_nents != 1 ||
	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
S
Sagi Grimberg 已提交
692 693
		spin_lock_irqsave(&ib_conn->lock, flags);
		desc = list_first_entry(&ib_conn->fastreg.pool,
694 695
					struct fast_reg_descriptor, list);
		list_del(&desc->list);
S
Sagi Grimberg 已提交
696
		spin_unlock_irqrestore(&ib_conn->lock, flags);
697 698
		regd_buf->reg.mem_h = desc;
	}
699

700 701
	err = iser_fast_reg_mr(iser_task, regd_buf, mem,
			       ISER_DATA_KEY_VALID, &data_sge);
702 703 704
	if (err)
		goto err_reg;

705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
		struct ib_sge prot_sge, sig_sge;

		memset(&prot_sge, 0, sizeof(prot_sge));
		if (scsi_prot_sg_count(iser_task->sc)) {
			mem = &iser_task->prot[cmd_dir];
			aligned_len = iser_data_buf_aligned_len(mem, ibdev);
			if (aligned_len != mem->dma_nents) {
				err = fall_to_bounce_buf(iser_task, ibdev, mem,
							 &iser_task->prot_copy[cmd_dir],
							 cmd_dir, aligned_len);
				if (err) {
					iser_err("failed to allocate bounce buffer\n");
					return err;
				}
				mem = &iser_task->prot_copy[cmd_dir];
			}

			err = iser_fast_reg_mr(iser_task, regd_buf, mem,
					       ISER_PROT_KEY_VALID, &prot_sge);
			if (err)
				goto err_reg;
		}

		err = iser_reg_sig_mr(iser_task, desc, &data_sge,
				      &prot_sge, &sig_sge);
		if (err) {
			iser_err("Failed to register signature mr\n");
			return err;
		}
		desc->reg_indicators |= ISER_FASTREG_PROTECTED;

		regd_buf->reg.lkey = sig_sge.lkey;
		regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
		regd_buf->reg.va = sig_sge.addr;
		regd_buf->reg.len = sig_sge.length;
741
	} else {
742
		if (desc)
743
			regd_buf->reg.rkey = desc->data_mr->rkey;
744
		else
745
			regd_buf->reg.rkey = device->mr->rkey;
746

747 748 749 750
		regd_buf->reg.lkey = data_sge.lkey;
		regd_buf->reg.va = data_sge.addr;
		regd_buf->reg.len = data_sge.length;
	}
751

752 753
	return 0;
err_reg:
754
	if (desc) {
S
Sagi Grimberg 已提交
755 756 757
		spin_lock_irqsave(&ib_conn->lock, flags);
		list_add_tail(&desc->list, &ib_conn->fastreg.pool);
		spin_unlock_irqrestore(&ib_conn->lock, flags);
758 759
	}

760 761
	return err;
}