iser_memory.c 22.3 KB
Newer Older
1 2
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3
 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
A
Al Viro 已提交
37
#include <linux/highmem.h>
38 39 40 41
#include <linux/scatterlist.h>

#include "iscsi_iser.h"

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
static void
iser_free_bounce_sg(struct iser_data_buf *data)
{
	struct scatterlist *sg;
	int count;

	for_each_sg(data->sg, sg, data->size, count)
		__free_page(sg_page(sg));

	kfree(data->sg);

	data->sg = data->orig_sg;
	data->size = data->orig_size;
	data->orig_sg = NULL;
	data->orig_size = 0;
}

static int
iser_alloc_bounce_sg(struct iser_data_buf *data)
{
	struct scatterlist *sg;
	struct page *page;
	unsigned long length = data->data_len;
	int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);

	sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
	if (!sg)
		goto err;

	sg_init_table(sg, nents);
	while (length) {
		u32 page_len = min_t(u32, length, PAGE_SIZE);

		page = alloc_page(GFP_ATOMIC);
		if (!page)
			goto err;

		sg_set_page(&sg[i], page, page_len, 0);
		length -= page_len;
		i++;
	}

	data->orig_sg = data->sg;
	data->orig_size = data->size;
	data->sg = sg;
	data->size = nents;

	return 0;

err:
	for (; i > 0; i--)
		__free_page(sg_page(&sg[i - 1]));
	kfree(sg);

	return -ENOMEM;
}

static void
iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
{
	struct scatterlist *osg, *bsg = data->sg;
	void *oaddr, *baddr;
	unsigned int left = data->data_len;
	unsigned int bsg_off = 0;
	int i;

	for_each_sg(data->orig_sg, osg, data->orig_size, i) {
		unsigned int copy_len, osg_off = 0;

		oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
		copy_len = min(left, osg->length);
		while (copy_len) {
			unsigned int len = min(copy_len, bsg->length - bsg_off);

			baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
			if (to_buffer)
				memcpy(baddr + bsg_off, oaddr + osg_off, len);
			else
				memcpy(oaddr + osg_off, baddr + bsg_off, len);

			kunmap_atomic(baddr - bsg->offset);
			osg_off += len;
			bsg_off += len;
			copy_len -= len;

			if (bsg_off >= bsg->length) {
				bsg = sg_next(bsg);
				bsg_off = 0;
			}
		}
		kunmap_atomic(oaddr - osg->offset);
		left -= osg_off;
	}
}

static inline void
iser_copy_from_bounce(struct iser_data_buf *data)
{
	iser_copy_bounce(data, false);
}

static inline void
iser_copy_to_bounce(struct iser_data_buf *data)
{
	iser_copy_bounce(data, true);
}
148

149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
struct fast_reg_descriptor *
iser_reg_desc_get(struct ib_conn *ib_conn)
{
	struct fast_reg_descriptor *desc;
	unsigned long flags;

	spin_lock_irqsave(&ib_conn->lock, flags);
	desc = list_first_entry(&ib_conn->fastreg.pool,
				struct fast_reg_descriptor, list);
	list_del(&desc->list);
	spin_unlock_irqrestore(&ib_conn->lock, flags);

	return desc;
}

void
iser_reg_desc_put(struct ib_conn *ib_conn,
		  struct fast_reg_descriptor *desc)
{
	unsigned long flags;

	spin_lock_irqsave(&ib_conn->lock, flags);
171
	list_add(&desc->list, &ib_conn->fastreg.pool);
172 173 174
	spin_unlock_irqrestore(&ib_conn->lock, flags);
}

175 176 177
/**
 * iser_start_rdma_unaligned_sg
 */
178
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
179
					struct iser_data_buf *data,
180
					enum iser_data_dir cmd_dir)
181
{
S
Sagi Grimberg 已提交
182
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
183
	int rc;
184

185 186 187 188 189
	rc = iser_alloc_bounce_sg(data);
	if (rc) {
		iser_err("Failed to allocate bounce for data len %lu\n",
			 data->data_len);
		return rc;
190 191
	}

192 193 194 195 196 197 198 199 200 201 202 203
	if (cmd_dir == ISER_DIR_OUT)
		iser_copy_to_bounce(data);

	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
					(cmd_dir == ISER_DIR_OUT) ?
					DMA_TO_DEVICE : DMA_FROM_DEVICE);
	if (!data->dma_nents) {
		iser_err("Got dma_nents %d, something went wrong...\n",
			 data->dma_nents);
		rc = -ENOMEM;
		goto err;
	}
204

205
	return 0;
206 207 208
err:
	iser_free_bounce_sg(data);
	return rc;
209 210 211 212 213
}

/**
 * iser_finalize_rdma_unaligned_sg
 */
214

215
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
216 217
				     struct iser_data_buf *data,
				     enum iser_data_dir cmd_dir)
218
{
219
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
220

221
	ib_dma_unmap_sg(dev, data->sg, data->size,
222 223
			(cmd_dir == ISER_DIR_OUT) ?
			DMA_TO_DEVICE : DMA_FROM_DEVICE);
224

225 226
	if (cmd_dir == ISER_DIR_IN)
		iser_copy_from_bounce(data);
227

228
	iser_free_bounce_sg(data);
229 230
}

231 232
#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)

233 234 235 236 237 238 239 240 241 242 243 244
/**
 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
 * and returns the length of resulting physical address array (may be less than
 * the original due to possible compaction).
 *
 * we build a "page vec" under the assumption that the SG meets the RDMA
 * alignment requirements. Other then the first and last SG elements, all
 * the "internal" elements can be compacted into a list whose elements are
 * dma addresses of physical pages. The code supports also the weird case
 * where --few fragments of the same page-- are present in the SG as
 * consecutive elements. Also, it handles one entry SG.
 */
245

246
static int iser_sg_to_page_vec(struct iser_data_buf *data,
247 248
			       struct ib_device *ibdev, u64 *pages,
			       int *offset, int *data_size)
249
{
250
	struct scatterlist *sg, *sgl = data->sg;
251
	u64 start_addr, end_addr, page, chunk_start = 0;
252
	unsigned long total_sz = 0;
253 254
	unsigned int dma_len;
	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
255 256

	/* compute the offset of first element */
257
	*offset = (u64) sgl[0].offset & ~MASK_4K;
258

259 260
	new_chunk = 1;
	cur_page  = 0;
J
Jens Axboe 已提交
261
	for_each_sg(sgl, sg, data->dma_nents, i) {
262 263 264 265 266
		start_addr = ib_sg_dma_address(ibdev, sg);
		if (new_chunk)
			chunk_start = start_addr;
		dma_len = ib_sg_dma_len(ibdev, sg);
		end_addr = start_addr + dma_len;
267
		total_sz += dma_len;
268

269 270 271 272
		/* collect page fragments until aligned or end of SG list */
		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
			new_chunk = 0;
			continue;
273
		}
274 275 276 277 278 279 280
		new_chunk = 1;

		/* address of the first page in the contiguous chunk;
		   masking relevant for the very first SG entry,
		   which might be unaligned */
		page = chunk_start & MASK_4K;
		do {
281
			pages[cur_page++] = page;
282
			page += SIZE_4K;
283
		} while (page < end_addr);
284
	}
285

286 287 288
	*data_size = total_sz;
	iser_dbg("page_vec->data_size:%d cur_page %d\n",
		 *data_size, cur_page);
289 290 291 292 293 294 295 296 297 298
	return cur_page;
}


/**
 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
 * the number of entries which are aligned correctly. Supports the case where
 * consecutive SG elements are actually fragments of the same physcial page.
 */
299 300
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
				      struct ib_device *ibdev)
301
{
302
	struct scatterlist *sg, *sgl, *next_sg = NULL;
303 304 305 306 307
	u64 start_addr, end_addr;
	int i, ret_len, start_check = 0;

	if (data->dma_nents == 1)
		return 1;
308

309
	sgl = data->sg;
310
	start_addr  = ib_sg_dma_address(ibdev, sgl);
311

J
Jens Axboe 已提交
312
	for_each_sg(sgl, sg, data->dma_nents, i) {
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
		if (start_check && !IS_4K_ALIGNED(start_addr))
			break;

		next_sg = sg_next(sg);
		if (!next_sg)
			break;

		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
		start_addr  = ib_sg_dma_address(ibdev, next_sg);

		if (end_addr == start_addr) {
			start_check = 0;
			continue;
		} else
			start_check = 1;

		if (!IS_4K_ALIGNED(end_addr))
			break;
331
	}
332
	ret_len = (next_sg) ? i : i+1;
333 334 335 336 337

	if (unlikely(ret_len != data->dma_nents))
		iser_warn("rdma alignment violation (%d/%d aligned)\n",
			  ret_len, data->dma_nents);

338 339 340
	return ret_len;
}

341 342
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
343
{
J
Jens Axboe 已提交
344
	struct scatterlist *sg;
345 346
	int i;

347
	for_each_sg(data->sg, sg, data->dma_nents, i)
348
		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
E
Erez Zilber 已提交
349
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
J
Jens Axboe 已提交
350
			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
J
Jens Axboe 已提交
351
			 sg_page(sg), sg->offset,
J
Jens Axboe 已提交
352
			 sg->length, ib_sg_dma_len(ibdev, sg));
353 354 355 356 357 358 359 360 361 362 363 364
}

static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
	int i;

	iser_err("page vec length %d data size %d\n",
		 page_vec->length, page_vec->data_size);
	for (i = 0; i < page_vec->length; i++)
		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
}

365 366 367 368
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *data,
			    enum iser_data_dir iser_dir,
			    enum dma_data_direction dma_dir)
369
{
370
	struct ib_device *dev;
371

372
	iser_task->dir[iser_dir] = 1;
S
Sagi Grimberg 已提交
373
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
374

375
	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
376 377 378 379 380 381 382
	if (data->dma_nents == 0) {
		iser_err("dma_map_sg failed!!!\n");
		return -EINVAL;
	}
	return 0;
}

383
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
384 385
			      struct iser_data_buf *data,
			      enum dma_data_direction dir)
386
{
387
	struct ib_device *dev;
388

S
Sagi Grimberg 已提交
389
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
390
	ib_dma_unmap_sg(dev, data->sg, data->size, dir);
391 392
}

393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
static int
iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
	     struct iser_mem_reg *reg)
{
	struct scatterlist *sg = mem->sg;

	reg->sge.lkey = device->mr->lkey;
	reg->rkey = device->mr->rkey;
	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);

	iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
		 " length=0x%x\n", reg->sge.lkey, reg->rkey,
		 reg->sge.addr, reg->sge.length);

	return 0;
}

411
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
412
			      struct iser_data_buf *mem,
413
			      enum iser_data_dir cmd_dir)
414
{
415 416
	struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
	struct iser_device *device = iser_task->iser_conn->ib_conn.device;
417 418 419 420

	iscsi_conn->fmr_unalign_cnt++;

	if (iser_debug_level > 0)
421
		iser_data_buf_dump(mem, device->ib_device);
422 423

	/* unmap the command data before accessing it */
424 425 426
	iser_dma_unmap_task_data(iser_task, mem,
				 (cmd_dir == ISER_DIR_OUT) ?
				 DMA_TO_DEVICE : DMA_FROM_DEVICE);
427 428 429

	/* allocate copy buf, if we are writing, copy the */
	/* unaligned scatterlist, dma map the copy        */
430
	if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
431
		return -ENOMEM;
432 433 434 435

	return 0;
}

436 437 438 439 440 441
/**
 * iser_reg_page_vec - Register physical memory
 *
 * returns: 0 on success, errno code on failure
 */
static
442 443
int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
		      struct iser_data_buf *mem,
444
		      struct iser_page_vec *page_vec,
445
		      struct iser_mem_reg *mem_reg)
446
{
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
	struct ib_pool_fmr *fmr;
	int ret, plen;

	plen = iser_sg_to_page_vec(mem, device->ib_device,
				   page_vec->pages,
				   &page_vec->offset,
				   &page_vec->data_size);
	page_vec->length = plen;
	if (plen * SIZE_4K < page_vec->data_size) {
		iser_err("page vec too short to hold this SG\n");
		iser_data_buf_dump(mem, device->ib_device);
		iser_dump_page_vec(page_vec);
		return -EINVAL;
	}
463

464 465
	fmr  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
				    page_vec->pages,
466
				    page_vec->length,
467 468 469 470 471
				    page_vec->pages[0]);
	if (IS_ERR(fmr)) {
		ret = PTR_ERR(fmr);
		iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
		return ret;
472 473
	}

474
	mem_reg->sge.lkey = fmr->fmr->lkey;
475
	mem_reg->rkey = fmr->fmr->rkey;
476 477
	mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
	mem_reg->sge.length = page_vec->data_size;
478 479
	mem_reg->mem_h = fmr;

480 481 482 483 484 485 486 487 488 489
	return 0;
}

/**
 * Unregister (previosuly registered using FMR) memory.
 * If memory is non-FMR does nothing.
 */
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
			enum iser_data_dir cmd_dir)
{
490
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
	int ret;

	if (!reg->mem_h)
		return;

	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);

	ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
	if (ret)
		iser_err("ib_fmr_pool_unmap failed %d\n", ret);

	reg->mem_h = NULL;
}

void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
			    enum iser_data_dir cmd_dir)
{
508
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
509

510
	if (!reg->mem_h)
511 512
		return;

513 514
	iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
			  reg->mem_h);
515 516 517
	reg->mem_h = NULL;
}

518
/**
519 520
 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
 * using FMR (if possible) obtaining rkey and va
521 522 523
 *
 * returns 0 on success, errno code on failure
 */
524 525
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
			  enum iser_data_dir cmd_dir)
526
{
S
Sagi Grimberg 已提交
527 528
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device   *device = ib_conn->device;
529
	struct ib_device     *ibdev = device->ib_device;
530
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
531
	struct iser_mem_reg *mem_reg;
532 533
	int aligned_len;
	int err;
E
Erez Zilber 已提交
534
	int i;
535

536
	mem_reg = &iser_task->rdma_reg[cmd_dir];
537

538
	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
539
	if (aligned_len != mem->dma_nents) {
540
		err = fall_to_bounce_buf(iser_task, mem, cmd_dir);
541 542 543 544
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
545 546
	}

547 548
	/* if there a single dma entry, FMR is not needed */
	if (mem->dma_nents == 1) {
549
		return iser_reg_dma(device, mem, mem_reg);
550
	} else { /* use FMR for multiple dma entries */
551
		err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
552
					mem_reg);
553
		if (err && err != -EAGAIN) {
554
			iser_data_buf_dump(mem, ibdev);
555 556 557
			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
				 mem->dma_nents,
				 ntoh24(iser_task->desc.iscsi_header.dlength));
558
			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
S
Sagi Grimberg 已提交
559 560 561 562
				 ib_conn->fmr.page_vec->data_size,
				 ib_conn->fmr.page_vec->length,
				 ib_conn->fmr.page_vec->offset);
			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
563
				iser_err("page_vec[%d] = 0x%llx\n", i,
S
Sagi Grimberg 已提交
564
					 (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
E
Erez Zilber 已提交
565
		}
566 567
		if (err)
			return err;
E
Erez Zilber 已提交
568
	}
569 570
	return 0;
}
571

S
Sagi Grimberg 已提交
572
static void
573 574 575
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
		    struct ib_sig_domain *domain)
{
576
	domain->sig_type = IB_SIG_TYPE_T10_DIF;
S
Sagi Grimberg 已提交
577 578
	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
579 580 581 582 583 584 585
	/*
	 * At the moment we hard code those, but in the future
	 * we will take them from sc.
	 */
	domain->sig.dif.apptag_check_mask = 0xffff;
	domain->sig.dif.app_escape = true;
	domain->sig.dif.ref_escape = true;
S
Sagi Grimberg 已提交
586
	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
587
		domain->sig.dif.ref_remap = true;
588
};
589 590 591 592 593 594 595

static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
	switch (scsi_get_prot_op(sc)) {
	case SCSI_PROT_WRITE_INSERT:
	case SCSI_PROT_READ_STRIP:
596
		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
597
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
598 599 600 601
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
		break;
	case SCSI_PROT_READ_INSERT:
	case SCSI_PROT_WRITE_STRIP:
602
		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
603
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
604 605
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
606 607 608
		break;
	case SCSI_PROT_READ_PASS:
	case SCSI_PROT_WRITE_PASS:
609
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
610
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
611
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
612 613
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
614 615 616 617 618 619
		break;
	default:
		iser_err("Unsupported PI operation %d\n",
			 scsi_get_prot_op(sc));
		return -EINVAL;
	}
620

621 622 623
	return 0;
}

S
Sagi Grimberg 已提交
624
static inline void
625 626
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
S
Sagi Grimberg 已提交
627 628 629 630 631
	*mask = 0;
	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
		*mask |= ISER_CHECK_REFTAG;
	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
		*mask |= ISER_CHECK_GUARD;
632 633
}

634 635 636 637 638 639 640 641 642 643 644 645 646 647
static void
iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
	u32 rkey;

	memset(inv_wr, 0, sizeof(*inv_wr));
	inv_wr->opcode = IB_WR_LOCAL_INV;
	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
	inv_wr->ex.invalidate_rkey = mr->rkey;

	rkey = ib_inc_rkey(mr->rkey);
	ib_update_fast_reg_key(mr, rkey);
}

648 649
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
650
		struct iser_pi_context *pi_ctx,
651 652 653
		struct iser_mem_reg *data_reg,
		struct iser_mem_reg *prot_reg,
		struct iser_mem_reg *sig_reg)
654
{
S
Sagi Grimberg 已提交
655
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
656 657 658 659 660 661 662 663 664 665
	struct ib_send_wr sig_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
	struct ib_sig_attrs sig_attrs;
	int ret;

	memset(&sig_attrs, 0, sizeof(sig_attrs));
	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
	if (ret)
		goto err;

S
Sagi Grimberg 已提交
666
	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
667

668
	if (!pi_ctx->sig_mr_valid) {
669
		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
670 671 672 673 674 675
		wr = &inv_wr;
	}

	memset(&sig_wr, 0, sizeof(sig_wr));
	sig_wr.opcode = IB_WR_REG_SIG_MR;
	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
676
	sig_wr.sg_list = &data_reg->sge;
677 678 679 680
	sig_wr.num_sge = 1;
	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
	if (scsi_prot_sg_count(iser_task->sc))
681
		sig_wr.wr.sig_handover.prot = &prot_reg->sge;
682 683 684 685 686 687 688 689 690
	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
					      IB_ACCESS_REMOTE_READ |
					      IB_ACCESS_REMOTE_WRITE;

	if (!wr)
		wr = &sig_wr;
	else
		wr->next = &sig_wr;

S
Sagi Grimberg 已提交
691
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
692 693 694 695
	if (ret) {
		iser_err("reg_sig_mr failed, ret:%d\n", ret);
		goto err;
	}
696
	pi_ctx->sig_mr_valid = 0;
697

698 699 700 701
	sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
	sig_reg->rkey = pi_ctx->sig_mr->rkey;
	sig_reg->sge.addr = 0;
	sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
702

703 704 705
	iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
		 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
		 sig_reg->sge.length);
706 707 708 709
err:
	return ret;
}

710 711
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *mem,
712
			    struct iser_reg_resources *rsc,
713
			    struct iser_mem_reg *reg)
714
{
S
Sagi Grimberg 已提交
715 716
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
717 718
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *frpl;
719 720
	struct ib_send_wr fastreg_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
721 722 723
	int ret, offset, size, plen;

	/* if there a single dma entry, dma mr suffices */
724 725
	if (mem->dma_nents == 1)
		return iser_reg_dma(device, mem, reg);
726

727 728
	mr = rsc->mr;
	frpl = rsc->frpl;
729 730

	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
731 732 733 734 735
				   &offset, &size);
	if (plen * SIZE_4K < size) {
		iser_err("fast reg page_list too short to hold this SG\n");
		return -EINVAL;
	}
736

737
	if (!rsc->mr_valid) {
738
		iser_inv_rkey(&inv_wr, mr);
739 740 741 742 743
		wr = &inv_wr;
	}

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
744
	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
745
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
746 747
	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
	fastreg_wr.wr.fast_reg.page_list = frpl;
748
	fastreg_wr.wr.fast_reg.page_list_len = plen;
749
	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
750
	fastreg_wr.wr.fast_reg.length = size;
751
	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
752 753 754 755
	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
					       IB_ACCESS_REMOTE_WRITE |
					       IB_ACCESS_REMOTE_READ);

756
	if (!wr)
757
		wr = &fastreg_wr;
758
	else
759 760
		wr->next = &fastreg_wr;

S
Sagi Grimberg 已提交
761
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
762 763 764 765
	if (ret) {
		iser_err("fast registration failed, ret:%d\n", ret);
		return ret;
	}
766
	rsc->mr_valid = 0;
767

768 769 770 771
	reg->sge.lkey = mr->lkey;
	reg->rkey = mr->rkey;
	reg->sge.addr = frpl->page_list[0] + offset;
	reg->sge.length = size;
772 773 774 775 776

	return ret;
}

/**
777
 * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
778 779 780 781
 * using Fast Registration WR (if possible) obtaining rkey and va
 *
 * returns 0 on success, errno code on failure
 */
782 783
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
			      enum iser_data_dir cmd_dir)
784
{
S
Sagi Grimberg 已提交
785 786
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
787 788
	struct ib_device *ibdev = device->ib_device;
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
789
	struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
790
	struct fast_reg_descriptor *desc = NULL;
791 792 793 794
	int err, aligned_len;

	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
	if (aligned_len != mem->dma_nents) {
795
		err = fall_to_bounce_buf(iser_task, mem, cmd_dir);
796 797 798 799 800 801
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
	}

802 803
	if (mem->dma_nents != 1 ||
	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
804
		desc = iser_reg_desc_get(ib_conn);
805
		mem_reg->mem_h = desc;
806
	}
807

808 809
	err = iser_fast_reg_mr(iser_task, mem,
			       desc ? &desc->rsc : NULL, mem_reg);
810 811 812
	if (err)
		goto err_reg;

813
	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
814
		struct iser_mem_reg prot_reg;
815

816
		memset(&prot_reg, 0, sizeof(prot_reg));
817 818 819 820
		if (scsi_prot_sg_count(iser_task->sc)) {
			mem = &iser_task->prot[cmd_dir];
			aligned_len = iser_data_buf_aligned_len(mem, ibdev);
			if (aligned_len != mem->dma_nents) {
821
				err = fall_to_bounce_buf(iser_task, mem,
822
							 cmd_dir);
823 824 825 826 827 828
				if (err) {
					iser_err("failed to allocate bounce buffer\n");
					return err;
				}
			}

829 830
			err = iser_fast_reg_mr(iser_task, mem,
					       &desc->pi_ctx->rsc, &prot_reg);
831 832 833 834
			if (err)
				goto err_reg;
		}

835
		err = iser_reg_sig_mr(iser_task, desc->pi_ctx, mem_reg,
836
				      &prot_reg, mem_reg);
837 838 839 840
		if (err) {
			iser_err("Failed to register signature mr\n");
			return err;
		}
841
		desc->pi_ctx->sig_protected = 1;
842
	}
843

844 845
	return 0;
err_reg:
846 847
	if (desc)
		iser_reg_desc_put(ib_conn, desc);
848

849 850
	return err;
}