iser_memory.c 23.5 KB
Newer Older
1 2
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3
 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
A
Al Viro 已提交
37
#include <linux/highmem.h>
38 39 40
#include <linux/scatterlist.h>

#include "iscsi_iser.h"
41 42 43 44 45 46 47 48 49 50
static
int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
		      struct iser_data_buf *mem,
		      struct iser_reg_resources *rsc,
		      struct iser_mem_reg *mem_reg);
static
int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
		     struct iser_data_buf *mem,
		     struct iser_reg_resources *rsc,
		     struct iser_mem_reg *mem_reg);
51

S
Sagi Grimberg 已提交
52 53 54
static struct iser_reg_ops fastreg_ops = {
	.alloc_reg_res	= iser_alloc_fastreg_pool,
	.free_reg_res	= iser_free_fastreg_pool,
55 56
	.reg_mem	= iser_fast_reg_mr,
	.unreg_mem	= iser_unreg_mem_fastreg,
57 58
	.reg_desc_get	= iser_reg_desc_get_fr,
	.reg_desc_put	= iser_reg_desc_put_fr,
S
Sagi Grimberg 已提交
59 60 61 62 63
};

static struct iser_reg_ops fmr_ops = {
	.alloc_reg_res	= iser_alloc_fmr_pool,
	.free_reg_res	= iser_free_fmr_pool,
64 65
	.reg_mem	= iser_fast_reg_fmr,
	.unreg_mem	= iser_unreg_mem_fmr,
66 67
	.reg_desc_get	= iser_reg_desc_get_fmr,
	.reg_desc_put	= iser_reg_desc_put_fmr,
S
Sagi Grimberg 已提交
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
};

int iser_assign_reg_ops(struct iser_device *device)
{
	struct ib_device_attr *dev_attr = &device->dev_attr;

	/* Assign function handles  - based on FMR support */
	if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
	    device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
		iser_info("FMR supported, using FMR for registration\n");
		device->reg_ops = &fmr_ops;
	} else
	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
		iser_info("FastReg supported, using FastReg for registration\n");
		device->reg_ops = &fastreg_ops;
	} else {
		iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
		return -1;
	}

	return 0;
}

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static void
iser_free_bounce_sg(struct iser_data_buf *data)
{
	struct scatterlist *sg;
	int count;

	for_each_sg(data->sg, sg, data->size, count)
		__free_page(sg_page(sg));

	kfree(data->sg);

	data->sg = data->orig_sg;
	data->size = data->orig_size;
	data->orig_sg = NULL;
	data->orig_size = 0;
}

static int
iser_alloc_bounce_sg(struct iser_data_buf *data)
{
	struct scatterlist *sg;
	struct page *page;
	unsigned long length = data->data_len;
	int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);

	sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
	if (!sg)
		goto err;

	sg_init_table(sg, nents);
	while (length) {
		u32 page_len = min_t(u32, length, PAGE_SIZE);

		page = alloc_page(GFP_ATOMIC);
		if (!page)
			goto err;

		sg_set_page(&sg[i], page, page_len, 0);
		length -= page_len;
		i++;
	}

	data->orig_sg = data->sg;
	data->orig_size = data->size;
	data->sg = sg;
	data->size = nents;

	return 0;

err:
	for (; i > 0; i--)
		__free_page(sg_page(&sg[i - 1]));
	kfree(sg);

	return -ENOMEM;
}

static void
iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
{
	struct scatterlist *osg, *bsg = data->sg;
	void *oaddr, *baddr;
	unsigned int left = data->data_len;
	unsigned int bsg_off = 0;
	int i;

	for_each_sg(data->orig_sg, osg, data->orig_size, i) {
		unsigned int copy_len, osg_off = 0;

		oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
		copy_len = min(left, osg->length);
		while (copy_len) {
			unsigned int len = min(copy_len, bsg->length - bsg_off);

			baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
			if (to_buffer)
				memcpy(baddr + bsg_off, oaddr + osg_off, len);
			else
				memcpy(oaddr + osg_off, baddr + bsg_off, len);

			kunmap_atomic(baddr - bsg->offset);
			osg_off += len;
			bsg_off += len;
			copy_len -= len;

			if (bsg_off >= bsg->length) {
				bsg = sg_next(bsg);
				bsg_off = 0;
			}
		}
		kunmap_atomic(oaddr - osg->offset);
		left -= osg_off;
	}
}

static inline void
iser_copy_from_bounce(struct iser_data_buf *data)
{
	iser_copy_bounce(data, false);
}

static inline void
iser_copy_to_bounce(struct iser_data_buf *data)
{
	iser_copy_bounce(data, true);
}
197

198
struct iser_fr_desc *
199
iser_reg_desc_get_fr(struct ib_conn *ib_conn)
200
{
201
	struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
202
	struct iser_fr_desc *desc;
203 204
	unsigned long flags;

205
	spin_lock_irqsave(&fr_pool->lock, flags);
206
	desc = list_first_entry(&fr_pool->list,
207
				struct iser_fr_desc, list);
208
	list_del(&desc->list);
209
	spin_unlock_irqrestore(&fr_pool->lock, flags);
210 211 212 213 214

	return desc;
}

void
215 216
iser_reg_desc_put_fr(struct ib_conn *ib_conn,
		     struct iser_fr_desc *desc)
217
{
218
	struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
219 220
	unsigned long flags;

221
	spin_lock_irqsave(&fr_pool->lock, flags);
222
	list_add(&desc->list, &fr_pool->list);
223
	spin_unlock_irqrestore(&fr_pool->lock, flags);
224 225
}

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
struct iser_fr_desc *
iser_reg_desc_get_fmr(struct ib_conn *ib_conn)
{
	struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;

	return list_first_entry(&fr_pool->list,
				struct iser_fr_desc, list);
}

void
iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
		      struct iser_fr_desc *desc)
{
}

241 242 243
/**
 * iser_start_rdma_unaligned_sg
 */
244
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
245
					struct iser_data_buf *data,
246
					enum iser_data_dir cmd_dir)
247
{
S
Sagi Grimberg 已提交
248
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
249
	int rc;
250

251 252 253 254 255
	rc = iser_alloc_bounce_sg(data);
	if (rc) {
		iser_err("Failed to allocate bounce for data len %lu\n",
			 data->data_len);
		return rc;
256 257
	}

258 259 260 261 262 263 264 265 266 267 268 269
	if (cmd_dir == ISER_DIR_OUT)
		iser_copy_to_bounce(data);

	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
					(cmd_dir == ISER_DIR_OUT) ?
					DMA_TO_DEVICE : DMA_FROM_DEVICE);
	if (!data->dma_nents) {
		iser_err("Got dma_nents %d, something went wrong...\n",
			 data->dma_nents);
		rc = -ENOMEM;
		goto err;
	}
270

271
	return 0;
272 273 274
err:
	iser_free_bounce_sg(data);
	return rc;
275 276 277 278 279
}

/**
 * iser_finalize_rdma_unaligned_sg
 */
280

281
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
282 283
				     struct iser_data_buf *data,
				     enum iser_data_dir cmd_dir)
284
{
285
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
286

287
	ib_dma_unmap_sg(dev, data->sg, data->size,
288 289
			(cmd_dir == ISER_DIR_OUT) ?
			DMA_TO_DEVICE : DMA_FROM_DEVICE);
290

291 292
	if (cmd_dir == ISER_DIR_IN)
		iser_copy_from_bounce(data);
293

294
	iser_free_bounce_sg(data);
295 296
}

297 298
#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)

299 300 301 302 303 304 305 306 307 308 309 310
/**
 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
 * and returns the length of resulting physical address array (may be less than
 * the original due to possible compaction).
 *
 * we build a "page vec" under the assumption that the SG meets the RDMA
 * alignment requirements. Other then the first and last SG elements, all
 * the "internal" elements can be compacted into a list whose elements are
 * dma addresses of physical pages. The code supports also the weird case
 * where --few fragments of the same page-- are present in the SG as
 * consecutive elements. Also, it handles one entry SG.
 */
311

312
static int iser_sg_to_page_vec(struct iser_data_buf *data,
313 314
			       struct ib_device *ibdev, u64 *pages,
			       int *offset, int *data_size)
315
{
316
	struct scatterlist *sg, *sgl = data->sg;
317
	u64 start_addr, end_addr, page, chunk_start = 0;
318
	unsigned long total_sz = 0;
319 320
	unsigned int dma_len;
	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
321 322

	/* compute the offset of first element */
323
	*offset = (u64) sgl[0].offset & ~MASK_4K;
324

325 326
	new_chunk = 1;
	cur_page  = 0;
J
Jens Axboe 已提交
327
	for_each_sg(sgl, sg, data->dma_nents, i) {
328 329 330 331 332
		start_addr = ib_sg_dma_address(ibdev, sg);
		if (new_chunk)
			chunk_start = start_addr;
		dma_len = ib_sg_dma_len(ibdev, sg);
		end_addr = start_addr + dma_len;
333
		total_sz += dma_len;
334

335 336 337 338
		/* collect page fragments until aligned or end of SG list */
		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
			new_chunk = 0;
			continue;
339
		}
340 341 342 343 344 345 346
		new_chunk = 1;

		/* address of the first page in the contiguous chunk;
		   masking relevant for the very first SG entry,
		   which might be unaligned */
		page = chunk_start & MASK_4K;
		do {
347
			pages[cur_page++] = page;
348
			page += SIZE_4K;
349
		} while (page < end_addr);
350
	}
351

352 353 354
	*data_size = total_sz;
	iser_dbg("page_vec->data_size:%d cur_page %d\n",
		 *data_size, cur_page);
355 356 357 358 359 360 361 362 363 364
	return cur_page;
}


/**
 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
 * the number of entries which are aligned correctly. Supports the case where
 * consecutive SG elements are actually fragments of the same physcial page.
 */
365
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
366 367
				     struct ib_device *ibdev,
				     unsigned sg_tablesize)
368
{
369
	struct scatterlist *sg, *sgl, *next_sg = NULL;
370 371 372 373 374
	u64 start_addr, end_addr;
	int i, ret_len, start_check = 0;

	if (data->dma_nents == 1)
		return 1;
375

376
	sgl = data->sg;
377
	start_addr  = ib_sg_dma_address(ibdev, sgl);
378

379 380 381 382 383 384 385 386
	if (unlikely(sgl[0].offset &&
		     data->data_len >= sg_tablesize * PAGE_SIZE)) {
		iser_dbg("can't register length %lx with offset %x "
			 "fall to bounce buffer\n", data->data_len,
			 sgl[0].offset);
		return 0;
	}

J
Jens Axboe 已提交
387
	for_each_sg(sgl, sg, data->dma_nents, i) {
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
		if (start_check && !IS_4K_ALIGNED(start_addr))
			break;

		next_sg = sg_next(sg);
		if (!next_sg)
			break;

		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
		start_addr  = ib_sg_dma_address(ibdev, next_sg);

		if (end_addr == start_addr) {
			start_check = 0;
			continue;
		} else
			start_check = 1;

		if (!IS_4K_ALIGNED(end_addr))
			break;
406
	}
407
	ret_len = (next_sg) ? i : i+1;
408 409 410 411 412

	if (unlikely(ret_len != data->dma_nents))
		iser_warn("rdma alignment violation (%d/%d aligned)\n",
			  ret_len, data->dma_nents);

413 414 415
	return ret_len;
}

416 417
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
418
{
J
Jens Axboe 已提交
419
	struct scatterlist *sg;
420 421
	int i;

422
	for_each_sg(data->sg, sg, data->dma_nents, i)
423
		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
E
Erez Zilber 已提交
424
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
J
Jens Axboe 已提交
425
			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
J
Jens Axboe 已提交
426
			 sg_page(sg), sg->offset,
J
Jens Axboe 已提交
427
			 sg->length, ib_sg_dma_len(ibdev, sg));
428 429 430 431 432 433 434 435 436 437 438 439
}

static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
	int i;

	iser_err("page vec length %d data size %d\n",
		 page_vec->length, page_vec->data_size);
	for (i = 0; i < page_vec->length; i++)
		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
}

440 441 442 443
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *data,
			    enum iser_data_dir iser_dir,
			    enum dma_data_direction dma_dir)
444
{
445
	struct ib_device *dev;
446

447
	iser_task->dir[iser_dir] = 1;
S
Sagi Grimberg 已提交
448
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
449

450
	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
451 452 453 454 455 456 457
	if (data->dma_nents == 0) {
		iser_err("dma_map_sg failed!!!\n");
		return -EINVAL;
	}
	return 0;
}

458
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
459 460
			      struct iser_data_buf *data,
			      enum dma_data_direction dir)
461
{
462
	struct ib_device *dev;
463

S
Sagi Grimberg 已提交
464
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
465
	ib_dma_unmap_sg(dev, data->sg, data->size, dir);
466 467
}

468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
static int
iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
	     struct iser_mem_reg *reg)
{
	struct scatterlist *sg = mem->sg;

	reg->sge.lkey = device->mr->lkey;
	reg->rkey = device->mr->rkey;
	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);

	iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
		 " length=0x%x\n", reg->sge.lkey, reg->rkey,
		 reg->sge.addr, reg->sge.length);

	return 0;
}

486
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
487
			      struct iser_data_buf *mem,
488
			      enum iser_data_dir cmd_dir)
489
{
490 491
	struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
	struct iser_device *device = iser_task->iser_conn->ib_conn.device;
492 493 494 495

	iscsi_conn->fmr_unalign_cnt++;

	if (iser_debug_level > 0)
496
		iser_data_buf_dump(mem, device->ib_device);
497 498

	/* unmap the command data before accessing it */
499 500 501
	iser_dma_unmap_task_data(iser_task, mem,
				 (cmd_dir == ISER_DIR_OUT) ?
				 DMA_TO_DEVICE : DMA_FROM_DEVICE);
502 503 504

	/* allocate copy buf, if we are writing, copy the */
	/* unaligned scatterlist, dma map the copy        */
505
	if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
506
		return -ENOMEM;
507 508 509 510

	return 0;
}

511 512 513 514 515 516
/**
 * iser_reg_page_vec - Register physical memory
 *
 * returns: 0 on success, errno code on failure
 */
static
517
int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
518
		      struct iser_data_buf *mem,
519
		      struct iser_reg_resources *rsc,
520
		      struct iser_mem_reg *reg)
521
{
522 523
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
524 525
	struct iser_page_vec *page_vec = rsc->page_vec;
	struct ib_fmr_pool *fmr_pool = rsc->fmr_pool;
526 527 528 529 530 531 532 533 534 535 536 537 538 539
	struct ib_pool_fmr *fmr;
	int ret, plen;

	plen = iser_sg_to_page_vec(mem, device->ib_device,
				   page_vec->pages,
				   &page_vec->offset,
				   &page_vec->data_size);
	page_vec->length = plen;
	if (plen * SIZE_4K < page_vec->data_size) {
		iser_err("page vec too short to hold this SG\n");
		iser_data_buf_dump(mem, device->ib_device);
		iser_dump_page_vec(page_vec);
		return -EINVAL;
	}
540

541
	fmr  = ib_fmr_pool_map_phys(fmr_pool,
542
				    page_vec->pages,
543
				    page_vec->length,
544 545 546 547 548
				    page_vec->pages[0]);
	if (IS_ERR(fmr)) {
		ret = PTR_ERR(fmr);
		iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
		return ret;
549 550
	}

551 552 553 554 555
	reg->sge.lkey = fmr->fmr->lkey;
	reg->rkey = fmr->fmr->rkey;
	reg->sge.addr = page_vec->pages[0] + page_vec->offset;
	reg->sge.length = page_vec->data_size;
	reg->mem_h = fmr;
556

557 558 559 560 561 562 563 564 565 566
	return 0;
}

/**
 * Unregister (previosuly registered using FMR) memory.
 * If memory is non-FMR does nothing.
 */
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
			enum iser_data_dir cmd_dir)
{
567
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
	int ret;

	if (!reg->mem_h)
		return;

	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);

	ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
	if (ret)
		iser_err("ib_fmr_pool_unmap failed %d\n", ret);

	reg->mem_h = NULL;
}

void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
			    enum iser_data_dir cmd_dir)
{
585
	struct iser_device *device = iser_task->iser_conn->ib_conn.device;
586
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
587

588
	if (!reg->mem_h)
589 590
		return;

591 592
	device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn,
				     reg->mem_h);
593 594 595
	reg->mem_h = NULL;
}

S
Sagi Grimberg 已提交
596
static void
597 598 599
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
		    struct ib_sig_domain *domain)
{
600
	domain->sig_type = IB_SIG_TYPE_T10_DIF;
S
Sagi Grimberg 已提交
601 602
	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
603 604 605 606 607 608 609
	/*
	 * At the moment we hard code those, but in the future
	 * we will take them from sc.
	 */
	domain->sig.dif.apptag_check_mask = 0xffff;
	domain->sig.dif.app_escape = true;
	domain->sig.dif.ref_escape = true;
S
Sagi Grimberg 已提交
610
	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
611
		domain->sig.dif.ref_remap = true;
612
};
613 614 615 616 617 618 619

static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
	switch (scsi_get_prot_op(sc)) {
	case SCSI_PROT_WRITE_INSERT:
	case SCSI_PROT_READ_STRIP:
620
		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
621
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
622 623 624 625
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
		break;
	case SCSI_PROT_READ_INSERT:
	case SCSI_PROT_WRITE_STRIP:
626
		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
627
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
628 629
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
630 631 632
		break;
	case SCSI_PROT_READ_PASS:
	case SCSI_PROT_WRITE_PASS:
633
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
634
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
635
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
636 637
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
638 639 640 641 642 643
		break;
	default:
		iser_err("Unsupported PI operation %d\n",
			 scsi_get_prot_op(sc));
		return -EINVAL;
	}
644

645 646 647
	return 0;
}

S
Sagi Grimberg 已提交
648
static inline void
649 650
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
S
Sagi Grimberg 已提交
651 652 653 654 655
	*mask = 0;
	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
		*mask |= ISER_CHECK_REFTAG;
	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
		*mask |= ISER_CHECK_GUARD;
656 657
}

658 659 660 661 662 663 664 665 666 667 668 669 670 671
static void
iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
	u32 rkey;

	memset(inv_wr, 0, sizeof(*inv_wr));
	inv_wr->opcode = IB_WR_LOCAL_INV;
	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
	inv_wr->ex.invalidate_rkey = mr->rkey;

	rkey = ib_inc_rkey(mr->rkey);
	ib_update_fast_reg_key(mr, rkey);
}

672 673
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
674
		struct iser_pi_context *pi_ctx,
675 676 677
		struct iser_mem_reg *data_reg,
		struct iser_mem_reg *prot_reg,
		struct iser_mem_reg *sig_reg)
678
{
S
Sagi Grimberg 已提交
679
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
680 681 682 683 684 685 686 687 688 689
	struct ib_send_wr sig_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
	struct ib_sig_attrs sig_attrs;
	int ret;

	memset(&sig_attrs, 0, sizeof(sig_attrs));
	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
	if (ret)
		goto err;

S
Sagi Grimberg 已提交
690
	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
691

692
	if (!pi_ctx->sig_mr_valid) {
693
		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
694 695 696 697 698 699
		wr = &inv_wr;
	}

	memset(&sig_wr, 0, sizeof(sig_wr));
	sig_wr.opcode = IB_WR_REG_SIG_MR;
	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
700
	sig_wr.sg_list = &data_reg->sge;
701 702 703 704
	sig_wr.num_sge = 1;
	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
	if (scsi_prot_sg_count(iser_task->sc))
705
		sig_wr.wr.sig_handover.prot = &prot_reg->sge;
706 707 708 709 710 711 712 713 714
	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
					      IB_ACCESS_REMOTE_READ |
					      IB_ACCESS_REMOTE_WRITE;

	if (!wr)
		wr = &sig_wr;
	else
		wr->next = &sig_wr;

S
Sagi Grimberg 已提交
715
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
716 717 718 719
	if (ret) {
		iser_err("reg_sig_mr failed, ret:%d\n", ret);
		goto err;
	}
720
	pi_ctx->sig_mr_valid = 0;
721

722 723 724 725
	sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
	sig_reg->rkey = pi_ctx->sig_mr->rkey;
	sig_reg->sge.addr = 0;
	sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
726

727 728 729
	iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
		 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
		 sig_reg->sge.length);
730 731 732 733
err:
	return ret;
}

734 735
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *mem,
736
			    struct iser_reg_resources *rsc,
737
			    struct iser_mem_reg *reg)
738
{
S
Sagi Grimberg 已提交
739 740
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
741 742
	struct ib_mr *mr = rsc->mr;
	struct ib_fast_reg_page_list *frpl = rsc->frpl;
743 744
	struct ib_send_wr fastreg_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
745 746
	int ret, offset, size, plen;

747
	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
748 749 750 751 752
				   &offset, &size);
	if (plen * SIZE_4K < size) {
		iser_err("fast reg page_list too short to hold this SG\n");
		return -EINVAL;
	}
753

754
	if (!rsc->mr_valid) {
755
		iser_inv_rkey(&inv_wr, mr);
756 757 758 759 760
		wr = &inv_wr;
	}

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
761
	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
762
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
763 764
	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
	fastreg_wr.wr.fast_reg.page_list = frpl;
765
	fastreg_wr.wr.fast_reg.page_list_len = plen;
766
	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
767
	fastreg_wr.wr.fast_reg.length = size;
768
	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
769 770 771 772
	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
					       IB_ACCESS_REMOTE_WRITE |
					       IB_ACCESS_REMOTE_READ);

773
	if (!wr)
774
		wr = &fastreg_wr;
775
	else
776 777
		wr->next = &fastreg_wr;

S
Sagi Grimberg 已提交
778
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
779 780 781 782
	if (ret) {
		iser_err("fast registration failed, ret:%d\n", ret);
		return ret;
	}
783
	rsc->mr_valid = 0;
784

785 786 787 788
	reg->sge.lkey = mr->lkey;
	reg->rkey = mr->rkey;
	reg->sge.addr = frpl->page_list[0] + offset;
	reg->sge.length = size;
789 790 791 792

	return ret;
}

793 794 795 796
static int
iser_handle_unaligned_buf(struct iscsi_iser_task *task,
			  struct iser_data_buf *mem,
			  enum iser_data_dir dir)
797
{
798 799
	struct iser_conn *iser_conn = task->iser_conn;
	struct iser_device *device = iser_conn->ib_conn.device;
800 801
	int err, aligned_len;

802 803
	aligned_len = iser_data_buf_aligned_len(mem, device->ib_device,
						iser_conn->scsi_sg_tablesize);
804
	if (aligned_len != mem->dma_nents) {
805 806
		err = fall_to_bounce_buf(task, mem, dir);
		if (err)
807 808 809
			return err;
	}

810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
	return 0;
}

static int
iser_reg_prot_sg(struct iscsi_iser_task *task,
		 struct iser_data_buf *mem,
		 struct iser_fr_desc *desc,
		 struct iser_mem_reg *reg)
{
	struct iser_device *device = task->iser_conn->ib_conn.device;

	if (mem->dma_nents == 1)
		return iser_reg_dma(device, mem, reg);

	return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg);
}

static int
iser_reg_data_sg(struct iscsi_iser_task *task,
		 struct iser_data_buf *mem,
		 struct iser_fr_desc *desc,
		 struct iser_mem_reg *reg)
{
	struct iser_device *device = task->iser_conn->ib_conn.device;

	if (mem->dma_nents == 1)
		return iser_reg_dma(device, mem, reg);

	return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg);
}

int iser_reg_rdma_mem(struct iscsi_iser_task *task,
		      enum iser_data_dir dir)
{
	struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
	struct iser_data_buf *mem = &task->data[dir];
	struct iser_mem_reg *reg = &task->rdma_reg[dir];
	struct iser_fr_desc *desc = NULL;
	int err;

	err = iser_handle_unaligned_buf(task, mem, dir);
	if (unlikely(err))
		return err;

855
	if (mem->dma_nents != 1 ||
856
	    scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) {
857
		desc = device->reg_ops->reg_desc_get(ib_conn);
858
		reg->mem_h = desc;
859
	}
860

861 862
	err = iser_reg_data_sg(task, mem, desc, reg);
	if (unlikely(err))
863 864
		goto err_reg;

865
	if (scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) {
866
		struct iser_mem_reg prot_reg;
867

868
		memset(&prot_reg, 0, sizeof(prot_reg));
869 870 871 872 873
		if (scsi_prot_sg_count(task->sc)) {
			mem = &task->prot[dir];
			err = iser_handle_unaligned_buf(task, mem, dir);
			if (unlikely(err))
				goto err_reg;
874

875 876
			err = iser_reg_prot_sg(task, mem, desc, &prot_reg);
			if (unlikely(err))
877 878 879
				goto err_reg;
		}

880 881 882 883 884
		err = iser_reg_sig_mr(task, desc->pi_ctx, reg,
				      &prot_reg, reg);
		if (unlikely(err))
			goto err_reg;

885
		desc->pi_ctx->sig_protected = 1;
886
	}
887

888
	return 0;
889

890
err_reg:
891
	if (desc)
892
		device->reg_ops->reg_desc_put(ib_conn, desc);
893

894 895
	return err;
}
896 897 898 899 900 901 902 903

void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
			 enum iser_data_dir dir)
{
	struct iser_device *device = task->iser_conn->ib_conn.device;

	device->reg_ops->unreg_mem(task, dir);
}