iser_memory.c 22.4 KB
Newer Older
1 2
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3
 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
A
Al Viro 已提交
37
#include <linux/highmem.h>
38 39 40 41 42
#include <linux/scatterlist.h>

#include "iscsi_iser.h"

#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
43

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
struct fast_reg_descriptor *
iser_reg_desc_get(struct ib_conn *ib_conn)
{
	struct fast_reg_descriptor *desc;
	unsigned long flags;

	spin_lock_irqsave(&ib_conn->lock, flags);
	desc = list_first_entry(&ib_conn->fastreg.pool,
				struct fast_reg_descriptor, list);
	list_del(&desc->list);
	spin_unlock_irqrestore(&ib_conn->lock, flags);

	return desc;
}

void
iser_reg_desc_put(struct ib_conn *ib_conn,
		  struct fast_reg_descriptor *desc)
{
	unsigned long flags;

	spin_lock_irqsave(&ib_conn->lock, flags);
	list_add_tail(&desc->list, &ib_conn->fastreg.pool);
	spin_unlock_irqrestore(&ib_conn->lock, flags);
}

70 71 72
/**
 * iser_start_rdma_unaligned_sg
 */
73
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
74
					struct iser_data_buf *data,
75
					enum iser_data_dir cmd_dir)
76
{
S
Sagi Grimberg 已提交
77
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
78
	struct scatterlist *sgl = data->sg;
79
	struct scatterlist *sg;
80
	char *mem = NULL;
81
	unsigned long  cmd_data_len = data->data_len;
82 83
	int dma_nents, i;

84
	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
O
Or Gerlitz 已提交
85
		mem = (void *)__get_free_pages(GFP_ATOMIC,
86
		      ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
87
	else
O
Or Gerlitz 已提交
88
		mem = kmalloc(cmd_data_len, GFP_ATOMIC);
89 90 91

	if (mem == NULL) {
		iser_err("Failed to allocate mem size %d %d for copying sglist\n",
92
			 data->size, (int)cmd_data_len);
93 94 95 96 97 98 99
		return -ENOMEM;
	}

	if (cmd_dir == ISER_DIR_OUT) {
		/* copy the unaligned sg the buffer which is used for RDMA */
		char *p, *from;

100
		sgl = data->sg;
J
Jens Axboe 已提交
101 102
		p = mem;
		for_each_sg(sgl, sg, data->size, i) {
103
			from = kmap_atomic(sg_page(sg));
104
			memcpy(p,
J
Jens Axboe 已提交
105 106
			       from + sg->offset,
			       sg->length);
107
			kunmap_atomic(from);
J
Jens Axboe 已提交
108
			p += sg->length;
109 110 111
		}
	}

112 113 114 115 116
	sg_init_one(&data->sg_single, mem, cmd_data_len);
	data->orig_sg = data->sg;
	data->sg = &data->sg_single;
	data->copy_buf = mem;
	dma_nents = ib_dma_map_sg(dev, data->sg, 1,
117 118
				  (cmd_dir == ISER_DIR_OUT) ?
				  DMA_TO_DEVICE : DMA_FROM_DEVICE);
119 120
	BUG_ON(dma_nents == 0);

121
	data->dma_nents = dma_nents;
122

123 124 125 126 127 128
	return 0;
}

/**
 * iser_finalize_rdma_unaligned_sg
 */
129

130
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
131 132
				     struct iser_data_buf *data,
				     enum iser_data_dir cmd_dir)
133
{
134
	struct ib_device *dev;
135 136
	unsigned long  cmd_data_len;

S
Sagi Grimberg 已提交
137
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
138

139
	ib_dma_unmap_sg(dev, data->sg, 1,
140 141
			(cmd_dir == ISER_DIR_OUT) ?
			DMA_TO_DEVICE : DMA_FROM_DEVICE);
142 143 144

	if (cmd_dir == ISER_DIR_IN) {
		char *mem;
J
Jens Axboe 已提交
145
		struct scatterlist *sgl, *sg;
146 147 148 149 150
		unsigned char *p, *to;
		unsigned int sg_size;
		int i;

		/* copy back read RDMA to unaligned sg */
151
		mem = data->copy_buf;
152

153
		sgl = data->sg;
154
		sg_size = data->size;
155

J
Jens Axboe 已提交
156 157
		p = mem;
		for_each_sg(sgl, sg, sg_size, i) {
158
			to = kmap_atomic(sg_page(sg));
J
Jens Axboe 已提交
159
			memcpy(to + sg->offset,
160
			       p,
J
Jens Axboe 已提交
161
			       sg->length);
162
			kunmap_atomic(to);
J
Jens Axboe 已提交
163
			p += sg->length;
164 165 166
		}
	}

167
	cmd_data_len = data->data_len;
168 169

	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
170
		free_pages((unsigned long)data->copy_buf,
171
			   ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
172
	else
173
		kfree(data->copy_buf);
174

175
	data->copy_buf = NULL;
176 177
}

178 179
#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)

180 181 182 183 184 185 186 187 188 189 190 191
/**
 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
 * and returns the length of resulting physical address array (may be less than
 * the original due to possible compaction).
 *
 * we build a "page vec" under the assumption that the SG meets the RDMA
 * alignment requirements. Other then the first and last SG elements, all
 * the "internal" elements can be compacted into a list whose elements are
 * dma addresses of physical pages. The code supports also the weird case
 * where --few fragments of the same page-- are present in the SG as
 * consecutive elements. Also, it handles one entry SG.
 */
192

193
static int iser_sg_to_page_vec(struct iser_data_buf *data,
194 195
			       struct ib_device *ibdev, u64 *pages,
			       int *offset, int *data_size)
196
{
197
	struct scatterlist *sg, *sgl = data->sg;
198
	u64 start_addr, end_addr, page, chunk_start = 0;
199
	unsigned long total_sz = 0;
200 201
	unsigned int dma_len;
	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
202 203

	/* compute the offset of first element */
204
	*offset = (u64) sgl[0].offset & ~MASK_4K;
205

206 207
	new_chunk = 1;
	cur_page  = 0;
J
Jens Axboe 已提交
208
	for_each_sg(sgl, sg, data->dma_nents, i) {
209 210 211 212 213
		start_addr = ib_sg_dma_address(ibdev, sg);
		if (new_chunk)
			chunk_start = start_addr;
		dma_len = ib_sg_dma_len(ibdev, sg);
		end_addr = start_addr + dma_len;
214
		total_sz += dma_len;
215

216 217 218 219
		/* collect page fragments until aligned or end of SG list */
		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
			new_chunk = 0;
			continue;
220
		}
221 222 223 224 225 226 227
		new_chunk = 1;

		/* address of the first page in the contiguous chunk;
		   masking relevant for the very first SG entry,
		   which might be unaligned */
		page = chunk_start & MASK_4K;
		do {
228
			pages[cur_page++] = page;
229
			page += SIZE_4K;
230
		} while (page < end_addr);
231
	}
232

233 234 235
	*data_size = total_sz;
	iser_dbg("page_vec->data_size:%d cur_page %d\n",
		 *data_size, cur_page);
236 237 238 239 240 241 242 243 244 245
	return cur_page;
}


/**
 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
 * the number of entries which are aligned correctly. Supports the case where
 * consecutive SG elements are actually fragments of the same physcial page.
 */
246 247
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
				      struct ib_device *ibdev)
248
{
249
	struct scatterlist *sg, *sgl, *next_sg = NULL;
250 251 252 253 254
	u64 start_addr, end_addr;
	int i, ret_len, start_check = 0;

	if (data->dma_nents == 1)
		return 1;
255

256
	sgl = data->sg;
257
	start_addr  = ib_sg_dma_address(ibdev, sgl);
258

J
Jens Axboe 已提交
259
	for_each_sg(sgl, sg, data->dma_nents, i) {
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
		if (start_check && !IS_4K_ALIGNED(start_addr))
			break;

		next_sg = sg_next(sg);
		if (!next_sg)
			break;

		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
		start_addr  = ib_sg_dma_address(ibdev, next_sg);

		if (end_addr == start_addr) {
			start_check = 0;
			continue;
		} else
			start_check = 1;

		if (!IS_4K_ALIGNED(end_addr))
			break;
278
	}
279
	ret_len = (next_sg) ? i : i+1;
280 281 282 283 284
	iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
		 ret_len, data->dma_nents, data);
	return ret_len;
}

285 286
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
287
{
J
Jens Axboe 已提交
288
	struct scatterlist *sg;
289 290
	int i;

291
	for_each_sg(data->sg, sg, data->dma_nents, i)
292
		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
E
Erez Zilber 已提交
293
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
J
Jens Axboe 已提交
294
			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
J
Jens Axboe 已提交
295
			 sg_page(sg), sg->offset,
J
Jens Axboe 已提交
296
			 sg->length, ib_sg_dma_len(ibdev, sg));
297 298 299 300 301 302 303 304 305 306 307 308
}

static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
	int i;

	iser_err("page vec length %d data size %d\n",
		 page_vec->length, page_vec->data_size);
	for (i = 0; i < page_vec->length; i++)
		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
}

309 310 311 312
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *data,
			    enum iser_data_dir iser_dir,
			    enum dma_data_direction dma_dir)
313
{
314
	struct ib_device *dev;
315

316
	iser_task->dir[iser_dir] = 1;
S
Sagi Grimberg 已提交
317
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
318

319
	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
320 321 322 323 324 325 326
	if (data->dma_nents == 0) {
		iser_err("dma_map_sg failed!!!\n");
		return -EINVAL;
	}
	return 0;
}

327
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
328 329
			      struct iser_data_buf *data,
			      enum dma_data_direction dir)
330
{
331
	struct ib_device *dev;
332

S
Sagi Grimberg 已提交
333
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
334
	ib_dma_unmap_sg(dev, data->sg, data->size, dir);
335 336
}

337
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
338
			      struct iser_data_buf *mem,
339 340 341
			      enum iser_data_dir cmd_dir,
			      int aligned_len)
{
342 343
	struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
	struct iser_device *device = iser_task->iser_conn->ib_conn.device;
344 345 346 347 348 349

	iscsi_conn->fmr_unalign_cnt++;
	iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
		  aligned_len, mem->size);

	if (iser_debug_level > 0)
350
		iser_data_buf_dump(mem, device->ib_device);
351 352

	/* unmap the command data before accessing it */
353 354 355
	iser_dma_unmap_task_data(iser_task, mem,
				 (cmd_dir == ISER_DIR_OUT) ?
				 DMA_TO_DEVICE : DMA_FROM_DEVICE);
356 357 358

	/* allocate copy buf, if we are writing, copy the */
	/* unaligned scatterlist, dma map the copy        */
359
	if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
360
		return -ENOMEM;
361 362 363 364

	return 0;
}

365 366 367 368 369 370
/**
 * iser_reg_page_vec - Register physical memory
 *
 * returns: 0 on success, errno code on failure
 */
static
371 372
int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
		      struct iser_data_buf *mem,
373
		      struct iser_page_vec *page_vec,
374
		      struct iser_mem_reg *mem_reg)
375
{
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
	struct ib_pool_fmr *fmr;
	int ret, plen;

	plen = iser_sg_to_page_vec(mem, device->ib_device,
				   page_vec->pages,
				   &page_vec->offset,
				   &page_vec->data_size);
	page_vec->length = plen;
	if (plen * SIZE_4K < page_vec->data_size) {
		iser_err("page vec too short to hold this SG\n");
		iser_data_buf_dump(mem, device->ib_device);
		iser_dump_page_vec(page_vec);
		return -EINVAL;
	}
392

393 394
	fmr  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
				    page_vec->pages,
395
				    page_vec->length,
396 397 398 399 400
				    page_vec->pages[0]);
	if (IS_ERR(fmr)) {
		ret = PTR_ERR(fmr);
		iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
		return ret;
401 402
	}

403 404 405 406 407 408
	mem_reg->lkey = fmr->fmr->lkey;
	mem_reg->rkey = fmr->fmr->rkey;
	mem_reg->va = page_vec->pages[0] + page_vec->offset;
	mem_reg->len = page_vec->data_size;
	mem_reg->mem_h = fmr;

409 410 411 412 413 414 415 416 417 418
	return 0;
}

/**
 * Unregister (previosuly registered using FMR) memory.
 * If memory is non-FMR does nothing.
 */
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
			enum iser_data_dir cmd_dir)
{
419
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
	int ret;

	if (!reg->mem_h)
		return;

	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);

	ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
	if (ret)
		iser_err("ib_fmr_pool_unmap failed %d\n", ret);

	reg->mem_h = NULL;
}

void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
			    enum iser_data_dir cmd_dir)
{
437
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
438

439
	if (!reg->mem_h)
440 441
		return;

442 443
	iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
			  reg->mem_h);
444 445 446
	reg->mem_h = NULL;
}

447
/**
448 449
 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
 * using FMR (if possible) obtaining rkey and va
450 451 452
 *
 * returns 0 on success, errno code on failure
 */
453 454
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
			  enum iser_data_dir cmd_dir)
455
{
S
Sagi Grimberg 已提交
456 457
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device   *device = ib_conn->device;
458
	struct ib_device     *ibdev = device->ib_device;
459
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
460
	struct iser_mem_reg *mem_reg;
461 462
	int aligned_len;
	int err;
E
Erez Zilber 已提交
463
	int i;
464
	struct scatterlist *sg;
465

466
	mem_reg = &iser_task->rdma_reg[cmd_dir];
467

468
	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
469
	if (aligned_len != mem->dma_nents) {
470
		err = fall_to_bounce_buf(iser_task, mem,
471 472 473 474 475
					 cmd_dir, aligned_len);
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
476 477
	}

478 479
	/* if there a single dma entry, FMR is not needed */
	if (mem->dma_nents == 1) {
480
		sg = mem->sg;
481

482 483 484 485
		mem_reg->lkey = device->mr->lkey;
		mem_reg->rkey = device->mr->rkey;
		mem_reg->len  = ib_sg_dma_len(ibdev, &sg[0]);
		mem_reg->va   = ib_sg_dma_address(ibdev, &sg[0]);
486 487 488

		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
			 "va: 0x%08lX sz: %ld]\n",
489 490 491 492
			 (unsigned int)mem_reg->lkey,
			 (unsigned int)mem_reg->rkey,
			 (unsigned long)mem_reg->va,
			 (unsigned long)mem_reg->len);
493
	} else { /* use FMR for multiple dma entries */
494
		err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
495
					mem_reg);
496
		if (err && err != -EAGAIN) {
497
			iser_data_buf_dump(mem, ibdev);
498 499 500
			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
				 mem->dma_nents,
				 ntoh24(iser_task->desc.iscsi_header.dlength));
501
			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
S
Sagi Grimberg 已提交
502 503 504 505
				 ib_conn->fmr.page_vec->data_size,
				 ib_conn->fmr.page_vec->length,
				 ib_conn->fmr.page_vec->offset);
			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
506
				iser_err("page_vec[%d] = 0x%llx\n", i,
S
Sagi Grimberg 已提交
507
					 (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
E
Erez Zilber 已提交
508
		}
509 510
		if (err)
			return err;
E
Erez Zilber 已提交
511
	}
512 513
	return 0;
}
514

S
Sagi Grimberg 已提交
515
static void
516 517 518
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
		    struct ib_sig_domain *domain)
{
519
	domain->sig_type = IB_SIG_TYPE_T10_DIF;
S
Sagi Grimberg 已提交
520 521
	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
522 523 524 525 526 527 528
	/*
	 * At the moment we hard code those, but in the future
	 * we will take them from sc.
	 */
	domain->sig.dif.apptag_check_mask = 0xffff;
	domain->sig.dif.app_escape = true;
	domain->sig.dif.ref_escape = true;
S
Sagi Grimberg 已提交
529
	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
530
		domain->sig.dif.ref_remap = true;
531
};
532 533 534 535 536 537 538

static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
	switch (scsi_get_prot_op(sc)) {
	case SCSI_PROT_WRITE_INSERT:
	case SCSI_PROT_READ_STRIP:
539
		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
540
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
541 542 543 544
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
		break;
	case SCSI_PROT_READ_INSERT:
	case SCSI_PROT_WRITE_STRIP:
545
		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
546
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
547 548
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
549 550 551
		break;
	case SCSI_PROT_READ_PASS:
	case SCSI_PROT_WRITE_PASS:
552
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
553
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
554
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
555 556
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
557 558 559 560 561 562
		break;
	default:
		iser_err("Unsupported PI operation %d\n",
			 scsi_get_prot_op(sc));
		return -EINVAL;
	}
563

564 565 566
	return 0;
}

S
Sagi Grimberg 已提交
567
static inline void
568 569
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
S
Sagi Grimberg 已提交
570 571 572 573 574
	*mask = 0;
	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
		*mask |= ISER_CHECK_REFTAG;
	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
		*mask |= ISER_CHECK_GUARD;
575 576
}

577 578 579 580 581 582 583 584 585 586 587 588 589 590
static void
iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
	u32 rkey;

	memset(inv_wr, 0, sizeof(*inv_wr));
	inv_wr->opcode = IB_WR_LOCAL_INV;
	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
	inv_wr->ex.invalidate_rkey = mr->rkey;

	rkey = ib_inc_rkey(mr->rkey);
	ib_update_fast_reg_key(mr, rkey);
}

591 592 593 594 595
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
		struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
		struct ib_sge *prot_sge, struct ib_sge *sig_sge)
{
S
Sagi Grimberg 已提交
596
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
597 598 599 600 601 602 603 604 605 606 607
	struct iser_pi_context *pi_ctx = desc->pi_ctx;
	struct ib_send_wr sig_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
	struct ib_sig_attrs sig_attrs;
	int ret;

	memset(&sig_attrs, 0, sizeof(sig_attrs));
	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
	if (ret)
		goto err;

S
Sagi Grimberg 已提交
608
	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
609 610

	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
611
		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
		wr = &inv_wr;
	}

	memset(&sig_wr, 0, sizeof(sig_wr));
	sig_wr.opcode = IB_WR_REG_SIG_MR;
	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
	sig_wr.sg_list = data_sge;
	sig_wr.num_sge = 1;
	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
	if (scsi_prot_sg_count(iser_task->sc))
		sig_wr.wr.sig_handover.prot = prot_sge;
	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
					      IB_ACCESS_REMOTE_READ |
					      IB_ACCESS_REMOTE_WRITE;

	if (!wr)
		wr = &sig_wr;
	else
		wr->next = &sig_wr;

S
Sagi Grimberg 已提交
633
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
634 635 636 637 638 639 640 641
	if (ret) {
		iser_err("reg_sig_mr failed, ret:%d\n", ret);
		goto err;
	}
	desc->reg_indicators &= ~ISER_SIG_KEY_VALID;

	sig_sge->lkey = pi_ctx->sig_mr->lkey;
	sig_sge->addr = 0;
S
Sagi Grimberg 已提交
642
	sig_sge->length = scsi_transfer_length(iser_task->sc);
643 644 645 646 647 648 649 650

	iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
		 sig_sge->addr, sig_sge->length,
		 sig_sge->lkey);
err:
	return ret;
}

651
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
652
			    struct iser_mem_reg *mem_reg,
653
			    struct iser_data_buf *mem,
654
			    enum iser_reg_indicator ind,
655
			    struct ib_sge *sge)
656
{
657
	struct fast_reg_descriptor *desc = mem_reg->mem_h;
S
Sagi Grimberg 已提交
658 659
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
660
	struct ib_device *ibdev = device->ib_device;
661 662
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *frpl;
663 664
	struct ib_send_wr fastreg_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
665 666 667 668
	int ret, offset, size, plen;

	/* if there a single dma entry, dma mr suffices */
	if (mem->dma_nents == 1) {
669
		struct scatterlist *sg = mem->sg;
670 671 672 673 674 675 676 677 678 679

		sge->lkey = device->mr->lkey;
		sge->addr   = ib_sg_dma_address(ibdev, &sg[0]);
		sge->length  = ib_sg_dma_len(ibdev, &sg[0]);

		iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
			 sge->lkey, sge->addr, sge->length);
		return 0;
	}

680 681 682 683 684 685 686 687 688
	if (ind == ISER_DATA_KEY_VALID) {
		mr = desc->data_mr;
		frpl = desc->data_frpl;
	} else {
		mr = desc->pi_ctx->prot_mr;
		frpl = desc->pi_ctx->prot_frpl;
	}

	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
689 690 691 692 693
				   &offset, &size);
	if (plen * SIZE_4K < size) {
		iser_err("fast reg page_list too short to hold this SG\n");
		return -EINVAL;
	}
694

695
	if (!(desc->reg_indicators & ind)) {
696
		iser_inv_rkey(&inv_wr, mr);
697 698 699 700 701
		wr = &inv_wr;
	}

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
702
	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
703
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
704 705
	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
	fastreg_wr.wr.fast_reg.page_list = frpl;
706
	fastreg_wr.wr.fast_reg.page_list_len = plen;
707
	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
708
	fastreg_wr.wr.fast_reg.length = size;
709
	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
710 711 712 713
	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
					       IB_ACCESS_REMOTE_WRITE |
					       IB_ACCESS_REMOTE_READ);

714
	if (!wr)
715
		wr = &fastreg_wr;
716
	else
717 718
		wr->next = &fastreg_wr;

S
Sagi Grimberg 已提交
719
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
720 721 722 723
	if (ret) {
		iser_err("fast registration failed, ret:%d\n", ret);
		return ret;
	}
724
	desc->reg_indicators &= ~ind;
725

726 727
	sge->lkey = mr->lkey;
	sge->addr = frpl->page_list[0] + offset;
728
	sge->length = size;
729 730 731 732 733

	return ret;
}

/**
734
 * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
735 736 737 738
 * using Fast Registration WR (if possible) obtaining rkey and va
 *
 * returns 0 on success, errno code on failure
 */
739 740
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
			      enum iser_data_dir cmd_dir)
741
{
S
Sagi Grimberg 已提交
742 743
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
744 745
	struct ib_device *ibdev = device->ib_device;
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
746
	struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
747 748
	struct fast_reg_descriptor *desc = NULL;
	struct ib_sge data_sge;
749 750 751 752
	int err, aligned_len;

	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
	if (aligned_len != mem->dma_nents) {
753
		err = fall_to_bounce_buf(iser_task, mem,
754 755 756 757 758 759 760
					 cmd_dir, aligned_len);
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
	}

761 762
	if (mem->dma_nents != 1 ||
	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
763
		desc = iser_reg_desc_get(ib_conn);
764
		mem_reg->mem_h = desc;
765
	}
766

767
	err = iser_fast_reg_mr(iser_task, mem_reg, mem,
768
			       ISER_DATA_KEY_VALID, &data_sge);
769 770 771
	if (err)
		goto err_reg;

772 773 774 775 776 777 778 779
	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
		struct ib_sge prot_sge, sig_sge;

		memset(&prot_sge, 0, sizeof(prot_sge));
		if (scsi_prot_sg_count(iser_task->sc)) {
			mem = &iser_task->prot[cmd_dir];
			aligned_len = iser_data_buf_aligned_len(mem, ibdev);
			if (aligned_len != mem->dma_nents) {
780
				err = fall_to_bounce_buf(iser_task, mem,
781 782 783 784 785 786 787
							 cmd_dir, aligned_len);
				if (err) {
					iser_err("failed to allocate bounce buffer\n");
					return err;
				}
			}

788
			err = iser_fast_reg_mr(iser_task, mem_reg, mem,
789 790 791 792 793 794 795 796 797 798 799 800 801
					       ISER_PROT_KEY_VALID, &prot_sge);
			if (err)
				goto err_reg;
		}

		err = iser_reg_sig_mr(iser_task, desc, &data_sge,
				      &prot_sge, &sig_sge);
		if (err) {
			iser_err("Failed to register signature mr\n");
			return err;
		}
		desc->reg_indicators |= ISER_FASTREG_PROTECTED;

802 803 804 805
		mem_reg->lkey = sig_sge.lkey;
		mem_reg->rkey = desc->pi_ctx->sig_mr->rkey;
		mem_reg->va = sig_sge.addr;
		mem_reg->len = sig_sge.length;
806
	} else {
807
		if (desc)
808
			mem_reg->rkey = desc->data_mr->rkey;
809
		else
810
			mem_reg->rkey = device->mr->rkey;
811

812 813 814
		mem_reg->lkey = data_sge.lkey;
		mem_reg->va = data_sge.addr;
		mem_reg->len = data_sge.length;
815
	}
816

817 818
	return 0;
err_reg:
819 820
	if (desc)
		iser_reg_desc_put(ib_conn, desc);
821

822 823
	return err;
}