iser_memory.c 23.6 KB
Newer Older
1 2
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3
 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
A
Al Viro 已提交
37
#include <linux/highmem.h>
38 39 40 41
#include <linux/scatterlist.h>

#include "iscsi_iser.h"

S
Sagi Grimberg 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
static struct iser_reg_ops fastreg_ops = {
	.alloc_reg_res	= iser_alloc_fastreg_pool,
	.free_reg_res	= iser_free_fastreg_pool,
	.reg_rdma_mem	= iser_reg_rdma_mem_fastreg,
	.unreg_rdma_mem	= iser_unreg_mem_fastreg,
};

static struct iser_reg_ops fmr_ops = {
	.alloc_reg_res	= iser_alloc_fmr_pool,
	.free_reg_res	= iser_free_fmr_pool,
	.reg_rdma_mem	= iser_reg_rdma_mem_fmr,
	.unreg_rdma_mem	= iser_unreg_mem_fmr,
};

int iser_assign_reg_ops(struct iser_device *device)
{
	struct ib_device_attr *dev_attr = &device->dev_attr;

	/* Assign function handles  - based on FMR support */
	if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
	    device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
		iser_info("FMR supported, using FMR for registration\n");
		device->reg_ops = &fmr_ops;
	} else
	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
		iser_info("FastReg supported, using FastReg for registration\n");
		device->reg_ops = &fastreg_ops;
	} else {
		iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
		return -1;
	}

	return 0;
}

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
static void
iser_free_bounce_sg(struct iser_data_buf *data)
{
	struct scatterlist *sg;
	int count;

	for_each_sg(data->sg, sg, data->size, count)
		__free_page(sg_page(sg));

	kfree(data->sg);

	data->sg = data->orig_sg;
	data->size = data->orig_size;
	data->orig_sg = NULL;
	data->orig_size = 0;
}

static int
iser_alloc_bounce_sg(struct iser_data_buf *data)
{
	struct scatterlist *sg;
	struct page *page;
	unsigned long length = data->data_len;
	int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);

	sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
	if (!sg)
		goto err;

	sg_init_table(sg, nents);
	while (length) {
		u32 page_len = min_t(u32, length, PAGE_SIZE);

		page = alloc_page(GFP_ATOMIC);
		if (!page)
			goto err;

		sg_set_page(&sg[i], page, page_len, 0);
		length -= page_len;
		i++;
	}

	data->orig_sg = data->sg;
	data->orig_size = data->size;
	data->sg = sg;
	data->size = nents;

	return 0;

err:
	for (; i > 0; i--)
		__free_page(sg_page(&sg[i - 1]));
	kfree(sg);

	return -ENOMEM;
}

static void
iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
{
	struct scatterlist *osg, *bsg = data->sg;
	void *oaddr, *baddr;
	unsigned int left = data->data_len;
	unsigned int bsg_off = 0;
	int i;

	for_each_sg(data->orig_sg, osg, data->orig_size, i) {
		unsigned int copy_len, osg_off = 0;

		oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
		copy_len = min(left, osg->length);
		while (copy_len) {
			unsigned int len = min(copy_len, bsg->length - bsg_off);

			baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
			if (to_buffer)
				memcpy(baddr + bsg_off, oaddr + osg_off, len);
			else
				memcpy(oaddr + osg_off, baddr + bsg_off, len);

			kunmap_atomic(baddr - bsg->offset);
			osg_off += len;
			bsg_off += len;
			copy_len -= len;

			if (bsg_off >= bsg->length) {
				bsg = sg_next(bsg);
				bsg_off = 0;
			}
		}
		kunmap_atomic(oaddr - osg->offset);
		left -= osg_off;
	}
}

static inline void
iser_copy_from_bounce(struct iser_data_buf *data)
{
	iser_copy_bounce(data, false);
}

static inline void
iser_copy_to_bounce(struct iser_data_buf *data)
{
	iser_copy_bounce(data, true);
}
183

184
struct iser_fr_desc *
185 186
iser_reg_desc_get(struct ib_conn *ib_conn)
{
187
	struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
188
	struct iser_fr_desc *desc;
189 190
	unsigned long flags;

191
	spin_lock_irqsave(&fr_pool->lock, flags);
192
	desc = list_first_entry(&fr_pool->list,
193
				struct iser_fr_desc, list);
194
	list_del(&desc->list);
195
	spin_unlock_irqrestore(&fr_pool->lock, flags);
196 197 198 199 200 201

	return desc;
}

void
iser_reg_desc_put(struct ib_conn *ib_conn,
202
		  struct iser_fr_desc *desc)
203
{
204
	struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
205 206
	unsigned long flags;

207
	spin_lock_irqsave(&fr_pool->lock, flags);
208
	list_add(&desc->list, &fr_pool->list);
209
	spin_unlock_irqrestore(&fr_pool->lock, flags);
210 211
}

212 213 214
/**
 * iser_start_rdma_unaligned_sg
 */
215
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
216
					struct iser_data_buf *data,
217
					enum iser_data_dir cmd_dir)
218
{
S
Sagi Grimberg 已提交
219
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
220
	int rc;
221

222 223 224 225 226
	rc = iser_alloc_bounce_sg(data);
	if (rc) {
		iser_err("Failed to allocate bounce for data len %lu\n",
			 data->data_len);
		return rc;
227 228
	}

229 230 231 232 233 234 235 236 237 238 239 240
	if (cmd_dir == ISER_DIR_OUT)
		iser_copy_to_bounce(data);

	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
					(cmd_dir == ISER_DIR_OUT) ?
					DMA_TO_DEVICE : DMA_FROM_DEVICE);
	if (!data->dma_nents) {
		iser_err("Got dma_nents %d, something went wrong...\n",
			 data->dma_nents);
		rc = -ENOMEM;
		goto err;
	}
241

242
	return 0;
243 244 245
err:
	iser_free_bounce_sg(data);
	return rc;
246 247 248 249 250
}

/**
 * iser_finalize_rdma_unaligned_sg
 */
251

252
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
253 254
				     struct iser_data_buf *data,
				     enum iser_data_dir cmd_dir)
255
{
256
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
257

258
	ib_dma_unmap_sg(dev, data->sg, data->size,
259 260
			(cmd_dir == ISER_DIR_OUT) ?
			DMA_TO_DEVICE : DMA_FROM_DEVICE);
261

262 263
	if (cmd_dir == ISER_DIR_IN)
		iser_copy_from_bounce(data);
264

265
	iser_free_bounce_sg(data);
266 267
}

268 269
#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)

270 271 272 273 274 275 276 277 278 279 280 281
/**
 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
 * and returns the length of resulting physical address array (may be less than
 * the original due to possible compaction).
 *
 * we build a "page vec" under the assumption that the SG meets the RDMA
 * alignment requirements. Other then the first and last SG elements, all
 * the "internal" elements can be compacted into a list whose elements are
 * dma addresses of physical pages. The code supports also the weird case
 * where --few fragments of the same page-- are present in the SG as
 * consecutive elements. Also, it handles one entry SG.
 */
282

283
static int iser_sg_to_page_vec(struct iser_data_buf *data,
284 285
			       struct ib_device *ibdev, u64 *pages,
			       int *offset, int *data_size)
286
{
287
	struct scatterlist *sg, *sgl = data->sg;
288
	u64 start_addr, end_addr, page, chunk_start = 0;
289
	unsigned long total_sz = 0;
290 291
	unsigned int dma_len;
	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
292 293

	/* compute the offset of first element */
294
	*offset = (u64) sgl[0].offset & ~MASK_4K;
295

296 297
	new_chunk = 1;
	cur_page  = 0;
J
Jens Axboe 已提交
298
	for_each_sg(sgl, sg, data->dma_nents, i) {
299 300 301 302 303
		start_addr = ib_sg_dma_address(ibdev, sg);
		if (new_chunk)
			chunk_start = start_addr;
		dma_len = ib_sg_dma_len(ibdev, sg);
		end_addr = start_addr + dma_len;
304
		total_sz += dma_len;
305

306 307 308 309
		/* collect page fragments until aligned or end of SG list */
		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
			new_chunk = 0;
			continue;
310
		}
311 312 313 314 315 316 317
		new_chunk = 1;

		/* address of the first page in the contiguous chunk;
		   masking relevant for the very first SG entry,
		   which might be unaligned */
		page = chunk_start & MASK_4K;
		do {
318
			pages[cur_page++] = page;
319
			page += SIZE_4K;
320
		} while (page < end_addr);
321
	}
322

323 324 325
	*data_size = total_sz;
	iser_dbg("page_vec->data_size:%d cur_page %d\n",
		 *data_size, cur_page);
326 327 328 329 330 331 332 333 334 335
	return cur_page;
}


/**
 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
 * the number of entries which are aligned correctly. Supports the case where
 * consecutive SG elements are actually fragments of the same physcial page.
 */
336 337
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
				      struct ib_device *ibdev)
338
{
339
	struct scatterlist *sg, *sgl, *next_sg = NULL;
340 341 342 343 344
	u64 start_addr, end_addr;
	int i, ret_len, start_check = 0;

	if (data->dma_nents == 1)
		return 1;
345

346
	sgl = data->sg;
347
	start_addr  = ib_sg_dma_address(ibdev, sgl);
348

J
Jens Axboe 已提交
349
	for_each_sg(sgl, sg, data->dma_nents, i) {
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
		if (start_check && !IS_4K_ALIGNED(start_addr))
			break;

		next_sg = sg_next(sg);
		if (!next_sg)
			break;

		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
		start_addr  = ib_sg_dma_address(ibdev, next_sg);

		if (end_addr == start_addr) {
			start_check = 0;
			continue;
		} else
			start_check = 1;

		if (!IS_4K_ALIGNED(end_addr))
			break;
368
	}
369
	ret_len = (next_sg) ? i : i+1;
370 371 372 373 374

	if (unlikely(ret_len != data->dma_nents))
		iser_warn("rdma alignment violation (%d/%d aligned)\n",
			  ret_len, data->dma_nents);

375 376 377
	return ret_len;
}

378 379
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
380
{
J
Jens Axboe 已提交
381
	struct scatterlist *sg;
382 383
	int i;

384
	for_each_sg(data->sg, sg, data->dma_nents, i)
385
		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
E
Erez Zilber 已提交
386
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
J
Jens Axboe 已提交
387
			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
J
Jens Axboe 已提交
388
			 sg_page(sg), sg->offset,
J
Jens Axboe 已提交
389
			 sg->length, ib_sg_dma_len(ibdev, sg));
390 391 392 393 394 395 396 397 398 399 400 401
}

static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
	int i;

	iser_err("page vec length %d data size %d\n",
		 page_vec->length, page_vec->data_size);
	for (i = 0; i < page_vec->length; i++)
		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
}

402 403 404 405
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *data,
			    enum iser_data_dir iser_dir,
			    enum dma_data_direction dma_dir)
406
{
407
	struct ib_device *dev;
408

409
	iser_task->dir[iser_dir] = 1;
S
Sagi Grimberg 已提交
410
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
411

412
	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
413 414 415 416 417 418 419
	if (data->dma_nents == 0) {
		iser_err("dma_map_sg failed!!!\n");
		return -EINVAL;
	}
	return 0;
}

420
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
421 422
			      struct iser_data_buf *data,
			      enum dma_data_direction dir)
423
{
424
	struct ib_device *dev;
425

S
Sagi Grimberg 已提交
426
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
427
	ib_dma_unmap_sg(dev, data->sg, data->size, dir);
428 429
}

430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
static int
iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
	     struct iser_mem_reg *reg)
{
	struct scatterlist *sg = mem->sg;

	reg->sge.lkey = device->mr->lkey;
	reg->rkey = device->mr->rkey;
	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);

	iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
		 " length=0x%x\n", reg->sge.lkey, reg->rkey,
		 reg->sge.addr, reg->sge.length);

	return 0;
}

448
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
449
			      struct iser_data_buf *mem,
450
			      enum iser_data_dir cmd_dir)
451
{
452 453
	struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
	struct iser_device *device = iser_task->iser_conn->ib_conn.device;
454 455 456 457

	iscsi_conn->fmr_unalign_cnt++;

	if (iser_debug_level > 0)
458
		iser_data_buf_dump(mem, device->ib_device);
459 460

	/* unmap the command data before accessing it */
461 462 463
	iser_dma_unmap_task_data(iser_task, mem,
				 (cmd_dir == ISER_DIR_OUT) ?
				 DMA_TO_DEVICE : DMA_FROM_DEVICE);
464 465 466

	/* allocate copy buf, if we are writing, copy the */
	/* unaligned scatterlist, dma map the copy        */
467
	if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
468
		return -ENOMEM;
469 470 471 472

	return 0;
}

473 474 475 476 477 478
/**
 * iser_reg_page_vec - Register physical memory
 *
 * returns: 0 on success, errno code on failure
 */
static
479
int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
480
		      struct iser_data_buf *mem,
481
		      struct iser_reg_resources *rsc,
482
		      struct iser_mem_reg *reg)
483
{
484 485
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
486 487
	struct iser_page_vec *page_vec = rsc->page_vec;
	struct ib_fmr_pool *fmr_pool = rsc->fmr_pool;
488 489 490 491 492 493 494 495 496 497 498 499 500 501
	struct ib_pool_fmr *fmr;
	int ret, plen;

	plen = iser_sg_to_page_vec(mem, device->ib_device,
				   page_vec->pages,
				   &page_vec->offset,
				   &page_vec->data_size);
	page_vec->length = plen;
	if (plen * SIZE_4K < page_vec->data_size) {
		iser_err("page vec too short to hold this SG\n");
		iser_data_buf_dump(mem, device->ib_device);
		iser_dump_page_vec(page_vec);
		return -EINVAL;
	}
502

503
	fmr  = ib_fmr_pool_map_phys(fmr_pool,
504
				    page_vec->pages,
505
				    page_vec->length,
506 507 508 509 510
				    page_vec->pages[0]);
	if (IS_ERR(fmr)) {
		ret = PTR_ERR(fmr);
		iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
		return ret;
511 512
	}

513 514 515 516 517
	reg->sge.lkey = fmr->fmr->lkey;
	reg->rkey = fmr->fmr->rkey;
	reg->sge.addr = page_vec->pages[0] + page_vec->offset;
	reg->sge.length = page_vec->data_size;
	reg->mem_h = fmr;
518

519 520 521 522 523 524 525 526 527 528
	return 0;
}

/**
 * Unregister (previosuly registered using FMR) memory.
 * If memory is non-FMR does nothing.
 */
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
			enum iser_data_dir cmd_dir)
{
529
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
	int ret;

	if (!reg->mem_h)
		return;

	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);

	ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
	if (ret)
		iser_err("ib_fmr_pool_unmap failed %d\n", ret);

	reg->mem_h = NULL;
}

void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
			    enum iser_data_dir cmd_dir)
{
547
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
548

549
	if (!reg->mem_h)
550 551
		return;

552 553
	iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
			  reg->mem_h);
554 555 556
	reg->mem_h = NULL;
}

557
/**
558 559
 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
 * using FMR (if possible) obtaining rkey and va
560 561 562
 *
 * returns 0 on success, errno code on failure
 */
563 564
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
			  enum iser_data_dir cmd_dir)
565
{
S
Sagi Grimberg 已提交
566
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
567
	struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
S
Sagi Grimberg 已提交
568
	struct iser_device   *device = ib_conn->device;
569
	struct ib_device     *ibdev = device->ib_device;
570
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
571
	struct iser_mem_reg *mem_reg;
572 573
	int aligned_len;
	int err;
E
Erez Zilber 已提交
574
	int i;
575

576
	mem_reg = &iser_task->rdma_reg[cmd_dir];
577

578
	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
579
	if (aligned_len != mem->dma_nents) {
580
		err = fall_to_bounce_buf(iser_task, mem, cmd_dir);
581 582 583 584
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
585 586
	}

587 588
	/* if there a single dma entry, FMR is not needed */
	if (mem->dma_nents == 1) {
589
		return iser_reg_dma(device, mem, mem_reg);
590
	} else { /* use FMR for multiple dma entries */
591 592 593 594
		struct iser_fr_desc *desc;

		desc = list_first_entry(&fr_pool->list,
					struct iser_fr_desc, list);
595
		err = iser_fast_reg_fmr(iser_task, mem, &desc->rsc, mem_reg);
596
		if (err && err != -EAGAIN) {
597
			iser_data_buf_dump(mem, ibdev);
598 599 600
			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
				 mem->dma_nents,
				 ntoh24(iser_task->desc.iscsi_header.dlength));
601
			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
602 603 604 605
				 desc->rsc.page_vec->data_size,
				 desc->rsc.page_vec->length,
				 desc->rsc.page_vec->offset);
			for (i = 0; i < desc->rsc.page_vec->length; i++)
606
				iser_err("page_vec[%d] = 0x%llx\n", i,
607
					 (unsigned long long)desc->rsc.page_vec->pages[i]);
E
Erez Zilber 已提交
608
		}
609 610
		if (err)
			return err;
E
Erez Zilber 已提交
611
	}
612 613
	return 0;
}
614

S
Sagi Grimberg 已提交
615
static void
616 617 618
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
		    struct ib_sig_domain *domain)
{
619
	domain->sig_type = IB_SIG_TYPE_T10_DIF;
S
Sagi Grimberg 已提交
620 621
	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
622 623 624 625 626 627 628
	/*
	 * At the moment we hard code those, but in the future
	 * we will take them from sc.
	 */
	domain->sig.dif.apptag_check_mask = 0xffff;
	domain->sig.dif.app_escape = true;
	domain->sig.dif.ref_escape = true;
S
Sagi Grimberg 已提交
629
	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
630
		domain->sig.dif.ref_remap = true;
631
};
632 633 634 635 636 637 638

static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
	switch (scsi_get_prot_op(sc)) {
	case SCSI_PROT_WRITE_INSERT:
	case SCSI_PROT_READ_STRIP:
639
		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
640
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
641 642 643 644
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
		break;
	case SCSI_PROT_READ_INSERT:
	case SCSI_PROT_WRITE_STRIP:
645
		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
646
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
647 648
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
649 650 651
		break;
	case SCSI_PROT_READ_PASS:
	case SCSI_PROT_WRITE_PASS:
652
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
653
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
654
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
655 656
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
657 658 659 660 661 662
		break;
	default:
		iser_err("Unsupported PI operation %d\n",
			 scsi_get_prot_op(sc));
		return -EINVAL;
	}
663

664 665 666
	return 0;
}

S
Sagi Grimberg 已提交
667
static inline void
668 669
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
S
Sagi Grimberg 已提交
670 671 672 673 674
	*mask = 0;
	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
		*mask |= ISER_CHECK_REFTAG;
	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
		*mask |= ISER_CHECK_GUARD;
675 676
}

677 678 679 680 681 682 683 684 685 686 687 688 689 690
static void
iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
	u32 rkey;

	memset(inv_wr, 0, sizeof(*inv_wr));
	inv_wr->opcode = IB_WR_LOCAL_INV;
	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
	inv_wr->ex.invalidate_rkey = mr->rkey;

	rkey = ib_inc_rkey(mr->rkey);
	ib_update_fast_reg_key(mr, rkey);
}

691 692
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
693
		struct iser_pi_context *pi_ctx,
694 695 696
		struct iser_mem_reg *data_reg,
		struct iser_mem_reg *prot_reg,
		struct iser_mem_reg *sig_reg)
697
{
S
Sagi Grimberg 已提交
698
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
699 700 701 702 703 704 705 706 707 708
	struct ib_send_wr sig_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
	struct ib_sig_attrs sig_attrs;
	int ret;

	memset(&sig_attrs, 0, sizeof(sig_attrs));
	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
	if (ret)
		goto err;

S
Sagi Grimberg 已提交
709
	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
710

711
	if (!pi_ctx->sig_mr_valid) {
712
		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
713 714 715 716 717 718
		wr = &inv_wr;
	}

	memset(&sig_wr, 0, sizeof(sig_wr));
	sig_wr.opcode = IB_WR_REG_SIG_MR;
	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
719
	sig_wr.sg_list = &data_reg->sge;
720 721 722 723
	sig_wr.num_sge = 1;
	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
	if (scsi_prot_sg_count(iser_task->sc))
724
		sig_wr.wr.sig_handover.prot = &prot_reg->sge;
725 726 727 728 729 730 731 732 733
	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
					      IB_ACCESS_REMOTE_READ |
					      IB_ACCESS_REMOTE_WRITE;

	if (!wr)
		wr = &sig_wr;
	else
		wr->next = &sig_wr;

S
Sagi Grimberg 已提交
734
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
735 736 737 738
	if (ret) {
		iser_err("reg_sig_mr failed, ret:%d\n", ret);
		goto err;
	}
739
	pi_ctx->sig_mr_valid = 0;
740

741 742 743 744
	sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
	sig_reg->rkey = pi_ctx->sig_mr->rkey;
	sig_reg->sge.addr = 0;
	sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
745

746 747 748
	iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
		 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
		 sig_reg->sge.length);
749 750 751 752
err:
	return ret;
}

753 754
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *mem,
755
			    struct iser_reg_resources *rsc,
756
			    struct iser_mem_reg *reg)
757
{
S
Sagi Grimberg 已提交
758 759
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
760 761
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *frpl;
762 763
	struct ib_send_wr fastreg_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
764 765 766
	int ret, offset, size, plen;

	/* if there a single dma entry, dma mr suffices */
767 768
	if (mem->dma_nents == 1)
		return iser_reg_dma(device, mem, reg);
769

770 771
	mr = rsc->mr;
	frpl = rsc->frpl;
772 773

	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
774 775 776 777 778
				   &offset, &size);
	if (plen * SIZE_4K < size) {
		iser_err("fast reg page_list too short to hold this SG\n");
		return -EINVAL;
	}
779

780
	if (!rsc->mr_valid) {
781
		iser_inv_rkey(&inv_wr, mr);
782 783 784 785 786
		wr = &inv_wr;
	}

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
787
	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
788
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
789 790
	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
	fastreg_wr.wr.fast_reg.page_list = frpl;
791
	fastreg_wr.wr.fast_reg.page_list_len = plen;
792
	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
793
	fastreg_wr.wr.fast_reg.length = size;
794
	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
795 796 797 798
	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
					       IB_ACCESS_REMOTE_WRITE |
					       IB_ACCESS_REMOTE_READ);

799
	if (!wr)
800
		wr = &fastreg_wr;
801
	else
802 803
		wr->next = &fastreg_wr;

S
Sagi Grimberg 已提交
804
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
805 806 807 808
	if (ret) {
		iser_err("fast registration failed, ret:%d\n", ret);
		return ret;
	}
809
	rsc->mr_valid = 0;
810

811 812 813 814
	reg->sge.lkey = mr->lkey;
	reg->rkey = mr->rkey;
	reg->sge.addr = frpl->page_list[0] + offset;
	reg->sge.length = size;
815 816 817 818 819

	return ret;
}

/**
820
 * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
821 822 823 824
 * using Fast Registration WR (if possible) obtaining rkey and va
 *
 * returns 0 on success, errno code on failure
 */
825 826
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
			      enum iser_data_dir cmd_dir)
827
{
S
Sagi Grimberg 已提交
828 829
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
830 831
	struct ib_device *ibdev = device->ib_device;
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
832
	struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
833
	struct iser_fr_desc *desc = NULL;
834 835 836 837
	int err, aligned_len;

	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
	if (aligned_len != mem->dma_nents) {
838
		err = fall_to_bounce_buf(iser_task, mem, cmd_dir);
839 840 841 842 843 844
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
	}

845 846
	if (mem->dma_nents != 1 ||
	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
847
		desc = iser_reg_desc_get(ib_conn);
848
		mem_reg->mem_h = desc;
849
	}
850

851 852
	err = iser_fast_reg_mr(iser_task, mem,
			       desc ? &desc->rsc : NULL, mem_reg);
853 854 855
	if (err)
		goto err_reg;

856
	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
857
		struct iser_mem_reg prot_reg;
858

859
		memset(&prot_reg, 0, sizeof(prot_reg));
860 861 862 863
		if (scsi_prot_sg_count(iser_task->sc)) {
			mem = &iser_task->prot[cmd_dir];
			aligned_len = iser_data_buf_aligned_len(mem, ibdev);
			if (aligned_len != mem->dma_nents) {
864
				err = fall_to_bounce_buf(iser_task, mem,
865
							 cmd_dir);
866 867 868 869 870 871
				if (err) {
					iser_err("failed to allocate bounce buffer\n");
					return err;
				}
			}

872 873
			err = iser_fast_reg_mr(iser_task, mem,
					       &desc->pi_ctx->rsc, &prot_reg);
874 875 876 877
			if (err)
				goto err_reg;
		}

878
		err = iser_reg_sig_mr(iser_task, desc->pi_ctx, mem_reg,
879
				      &prot_reg, mem_reg);
880 881 882 883
		if (err) {
			iser_err("Failed to register signature mr\n");
			return err;
		}
884
		desc->pi_ctx->sig_protected = 1;
885
	}
886

887 888
	return 0;
err_reg:
889 890
	if (desc)
		iser_reg_desc_put(ib_conn, desc);
891

892 893
	return err;
}