iser_memory.c 23.0 KB
Newer Older
1 2
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3
 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
A
Al Viro 已提交
37
#include <linux/highmem.h>
38 39 40 41 42
#include <linux/scatterlist.h>

#include "iscsi_iser.h"

#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
43

44 45 46
/**
 * iser_start_rdma_unaligned_sg
 */
47
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
48
					struct iser_data_buf *data,
49
					enum iser_data_dir cmd_dir)
50
{
S
Sagi Grimberg 已提交
51
	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
52
	struct scatterlist *sgl = data->sg;
53
	struct scatterlist *sg;
54
	char *mem = NULL;
55
	unsigned long  cmd_data_len = data->data_len;
56 57
	int dma_nents, i;

58
	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
O
Or Gerlitz 已提交
59
		mem = (void *)__get_free_pages(GFP_ATOMIC,
60
		      ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
61
	else
O
Or Gerlitz 已提交
62
		mem = kmalloc(cmd_data_len, GFP_ATOMIC);
63 64 65

	if (mem == NULL) {
		iser_err("Failed to allocate mem size %d %d for copying sglist\n",
66
			 data->size, (int)cmd_data_len);
67 68 69 70 71 72 73
		return -ENOMEM;
	}

	if (cmd_dir == ISER_DIR_OUT) {
		/* copy the unaligned sg the buffer which is used for RDMA */
		char *p, *from;

74
		sgl = data->sg;
J
Jens Axboe 已提交
75 76
		p = mem;
		for_each_sg(sgl, sg, data->size, i) {
77
			from = kmap_atomic(sg_page(sg));
78
			memcpy(p,
J
Jens Axboe 已提交
79 80
			       from + sg->offset,
			       sg->length);
81
			kunmap_atomic(from);
J
Jens Axboe 已提交
82
			p += sg->length;
83 84 85
		}
	}

86 87 88 89 90
	sg_init_one(&data->sg_single, mem, cmd_data_len);
	data->orig_sg = data->sg;
	data->sg = &data->sg_single;
	data->copy_buf = mem;
	dma_nents = ib_dma_map_sg(dev, data->sg, 1,
91 92
				  (cmd_dir == ISER_DIR_OUT) ?
				  DMA_TO_DEVICE : DMA_FROM_DEVICE);
93 94
	BUG_ON(dma_nents == 0);

95
	data->dma_nents = dma_nents;
96

97 98 99 100 101 102
	return 0;
}

/**
 * iser_finalize_rdma_unaligned_sg
 */
103

104
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
105 106
				     struct iser_data_buf *data,
				     enum iser_data_dir cmd_dir)
107
{
108
	struct ib_device *dev;
109 110
	unsigned long  cmd_data_len;

S
Sagi Grimberg 已提交
111
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
112

113
	ib_dma_unmap_sg(dev, data->sg, 1,
114 115
			(cmd_dir == ISER_DIR_OUT) ?
			DMA_TO_DEVICE : DMA_FROM_DEVICE);
116 117 118

	if (cmd_dir == ISER_DIR_IN) {
		char *mem;
J
Jens Axboe 已提交
119
		struct scatterlist *sgl, *sg;
120 121 122 123 124
		unsigned char *p, *to;
		unsigned int sg_size;
		int i;

		/* copy back read RDMA to unaligned sg */
125
		mem = data->copy_buf;
126

127
		sgl = data->sg;
128
		sg_size = data->size;
129

J
Jens Axboe 已提交
130 131
		p = mem;
		for_each_sg(sgl, sg, sg_size, i) {
132
			to = kmap_atomic(sg_page(sg));
J
Jens Axboe 已提交
133
			memcpy(to + sg->offset,
134
			       p,
J
Jens Axboe 已提交
135
			       sg->length);
136
			kunmap_atomic(to);
J
Jens Axboe 已提交
137
			p += sg->length;
138 139 140
		}
	}

141
	cmd_data_len = data->data_len;
142 143

	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
144
		free_pages((unsigned long)data->copy_buf,
145
			   ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
146
	else
147
		kfree(data->copy_buf);
148

149
	data->copy_buf = NULL;
150 151
}

152 153
#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)

154 155 156 157 158 159 160 161 162 163 164 165
/**
 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
 * and returns the length of resulting physical address array (may be less than
 * the original due to possible compaction).
 *
 * we build a "page vec" under the assumption that the SG meets the RDMA
 * alignment requirements. Other then the first and last SG elements, all
 * the "internal" elements can be compacted into a list whose elements are
 * dma addresses of physical pages. The code supports also the weird case
 * where --few fragments of the same page-- are present in the SG as
 * consecutive elements. Also, it handles one entry SG.
 */
166

167
static int iser_sg_to_page_vec(struct iser_data_buf *data,
168 169
			       struct ib_device *ibdev, u64 *pages,
			       int *offset, int *data_size)
170
{
171
	struct scatterlist *sg, *sgl = data->sg;
172
	u64 start_addr, end_addr, page, chunk_start = 0;
173
	unsigned long total_sz = 0;
174 175
	unsigned int dma_len;
	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
176 177

	/* compute the offset of first element */
178
	*offset = (u64) sgl[0].offset & ~MASK_4K;
179

180 181
	new_chunk = 1;
	cur_page  = 0;
J
Jens Axboe 已提交
182
	for_each_sg(sgl, sg, data->dma_nents, i) {
183 184 185 186 187
		start_addr = ib_sg_dma_address(ibdev, sg);
		if (new_chunk)
			chunk_start = start_addr;
		dma_len = ib_sg_dma_len(ibdev, sg);
		end_addr = start_addr + dma_len;
188
		total_sz += dma_len;
189

190 191 192 193
		/* collect page fragments until aligned or end of SG list */
		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
			new_chunk = 0;
			continue;
194
		}
195 196 197 198 199 200 201
		new_chunk = 1;

		/* address of the first page in the contiguous chunk;
		   masking relevant for the very first SG entry,
		   which might be unaligned */
		page = chunk_start & MASK_4K;
		do {
202
			pages[cur_page++] = page;
203
			page += SIZE_4K;
204
		} while (page < end_addr);
205
	}
206

207 208 209
	*data_size = total_sz;
	iser_dbg("page_vec->data_size:%d cur_page %d\n",
		 *data_size, cur_page);
210 211 212 213 214 215 216 217 218 219
	return cur_page;
}


/**
 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
 * the number of entries which are aligned correctly. Supports the case where
 * consecutive SG elements are actually fragments of the same physcial page.
 */
220 221
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
				      struct ib_device *ibdev)
222
{
223
	struct scatterlist *sg, *sgl, *next_sg = NULL;
224 225 226 227 228
	u64 start_addr, end_addr;
	int i, ret_len, start_check = 0;

	if (data->dma_nents == 1)
		return 1;
229

230
	sgl = data->sg;
231
	start_addr  = ib_sg_dma_address(ibdev, sgl);
232

J
Jens Axboe 已提交
233
	for_each_sg(sgl, sg, data->dma_nents, i) {
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
		if (start_check && !IS_4K_ALIGNED(start_addr))
			break;

		next_sg = sg_next(sg);
		if (!next_sg)
			break;

		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
		start_addr  = ib_sg_dma_address(ibdev, next_sg);

		if (end_addr == start_addr) {
			start_check = 0;
			continue;
		} else
			start_check = 1;

		if (!IS_4K_ALIGNED(end_addr))
			break;
252
	}
253
	ret_len = (next_sg) ? i : i+1;
254 255 256 257 258
	iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
		 ret_len, data->dma_nents, data);
	return ret_len;
}

259 260
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
261
{
J
Jens Axboe 已提交
262
	struct scatterlist *sg;
263 264
	int i;

265
	for_each_sg(data->sg, sg, data->dma_nents, i)
266
		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
E
Erez Zilber 已提交
267
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
J
Jens Axboe 已提交
268
			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
J
Jens Axboe 已提交
269
			 sg_page(sg), sg->offset,
J
Jens Axboe 已提交
270
			 sg->length, ib_sg_dma_len(ibdev, sg));
271 272 273 274 275 276 277 278 279 280 281 282 283
}

static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
	int i;

	iser_err("page vec length %d data size %d\n",
		 page_vec->length, page_vec->data_size);
	for (i = 0; i < page_vec->length; i++)
		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
}

static void iser_page_vec_build(struct iser_data_buf *data,
284 285
				struct iser_page_vec *page_vec,
				struct ib_device *ibdev)
286 287 288 289 290 291 292
{
	int page_vec_len = 0;

	page_vec->length = 0;
	page_vec->offset = 0;

	iser_dbg("Translating sg sz: %d\n", data->dma_nents);
293 294 295 296
	page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
					   &page_vec->offset,
					   &page_vec->data_size);
	iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
297 298 299

	page_vec->length = page_vec_len;

300
	if (page_vec_len * SIZE_4K < page_vec->data_size) {
301
		iser_err("page_vec too short to hold this SG\n");
302
		iser_data_buf_dump(data, ibdev);
303 304 305 306 307
		iser_dump_page_vec(page_vec);
		BUG();
	}
}

308 309 310 311
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
			    struct iser_data_buf *data,
			    enum iser_data_dir iser_dir,
			    enum dma_data_direction dma_dir)
312
{
313
	struct ib_device *dev;
314

315
	iser_task->dir[iser_dir] = 1;
S
Sagi Grimberg 已提交
316
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
317

318
	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
319 320 321 322 323 324 325
	if (data->dma_nents == 0) {
		iser_err("dma_map_sg failed!!!\n");
		return -EINVAL;
	}
	return 0;
}

326
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
327 328
			      struct iser_data_buf *data,
			      enum dma_data_direction dir)
329
{
330
	struct ib_device *dev;
331

S
Sagi Grimberg 已提交
332
	dev = iser_task->iser_conn->ib_conn.device->ib_device;
333
	ib_dma_unmap_sg(dev, data->sg, data->size, dir);
334 335
}

336
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
337
			      struct iser_data_buf *mem,
338 339 340
			      enum iser_data_dir cmd_dir,
			      int aligned_len)
{
341 342
	struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
	struct iser_device *device = iser_task->iser_conn->ib_conn.device;
343 344 345 346 347 348

	iscsi_conn->fmr_unalign_cnt++;
	iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
		  aligned_len, mem->size);

	if (iser_debug_level > 0)
349
		iser_data_buf_dump(mem, device->ib_device);
350 351

	/* unmap the command data before accessing it */
352 353 354
	iser_dma_unmap_task_data(iser_task, mem,
				 (cmd_dir == ISER_DIR_OUT) ?
				 DMA_TO_DEVICE : DMA_FROM_DEVICE);
355 356 357

	/* allocate copy buf, if we are writing, copy the */
	/* unaligned scatterlist, dma map the copy        */
358
	if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
359
		return -ENOMEM;
360 361 362 363

	return 0;
}

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
/**
 * iser_reg_page_vec - Register physical memory
 *
 * returns: 0 on success, errno code on failure
 */
static
int iser_reg_page_vec(struct ib_conn *ib_conn,
		      struct iser_page_vec *page_vec,
		      struct iser_mem_reg  *mem_reg)
{
	struct ib_pool_fmr *mem;
	u64		   io_addr;
	u64		   *page_list;
	int		   status;

	page_list = page_vec->pages;
	io_addr	  = page_list[0];

	mem  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
				    page_list,
				    page_vec->length,
				    io_addr);

	if (IS_ERR(mem)) {
		status = (int)PTR_ERR(mem);
		iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
		return status;
	}

	mem_reg->lkey  = mem->fmr->lkey;
	mem_reg->rkey  = mem->fmr->rkey;
	mem_reg->len   = page_vec->data_size;
396 397
	mem_reg->va    = io_addr + page_vec->offset;
	mem_reg->mem_h = (void *)mem;
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416

	iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
		 "entry[0]: (0x%08lx,%ld)] -> "
		 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
		 page_vec, page_vec->length,
		 (unsigned long)page_vec->pages[0],
		 (unsigned long)page_vec->data_size,
		 (unsigned int)mem_reg->lkey, mem_reg->mem_h,
		 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
	return 0;
}

/**
 * Unregister (previosuly registered using FMR) memory.
 * If memory is non-FMR does nothing.
 */
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
			enum iser_data_dir cmd_dir)
{
417
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
	int ret;

	if (!reg->mem_h)
		return;

	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);

	ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
	if (ret)
		iser_err("ib_fmr_pool_unmap failed %d\n", ret);

	reg->mem_h = NULL;
}

void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
			    enum iser_data_dir cmd_dir)
{
435
	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
436 437 438 439 440 441 442 443 444 445 446 447 448
	struct iser_conn *iser_conn = iser_task->iser_conn;
	struct ib_conn *ib_conn = &iser_conn->ib_conn;
	struct fast_reg_descriptor *desc = reg->mem_h;

	if (!desc)
		return;

	reg->mem_h = NULL;
	spin_lock_bh(&ib_conn->lock);
	list_add_tail(&desc->list, &ib_conn->fastreg.pool);
	spin_unlock_bh(&ib_conn->lock);
}

449
/**
450 451
 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
 * using FMR (if possible) obtaining rkey and va
452 453 454
 *
 * returns 0 on success, errno code on failure
 */
455 456
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
			  enum iser_data_dir cmd_dir)
457
{
S
Sagi Grimberg 已提交
458 459
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device   *device = ib_conn->device;
460
	struct ib_device     *ibdev = device->ib_device;
461
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
462
	struct iser_mem_reg *mem_reg;
463 464
	int aligned_len;
	int err;
E
Erez Zilber 已提交
465
	int i;
466
	struct scatterlist *sg;
467

468
	mem_reg = &iser_task->rdma_reg[cmd_dir];
469

470
	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
471
	if (aligned_len != mem->dma_nents) {
472
		err = fall_to_bounce_buf(iser_task, mem,
473 474 475 476 477
					 cmd_dir, aligned_len);
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
478 479
	}

480 481
	/* if there a single dma entry, FMR is not needed */
	if (mem->dma_nents == 1) {
482
		sg = mem->sg;
483

484 485 486 487
		mem_reg->lkey = device->mr->lkey;
		mem_reg->rkey = device->mr->rkey;
		mem_reg->len  = ib_sg_dma_len(ibdev, &sg[0]);
		mem_reg->va   = ib_sg_dma_address(ibdev, &sg[0]);
488 489 490

		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
			 "va: 0x%08lX sz: %ld]\n",
491 492 493 494
			 (unsigned int)mem_reg->lkey,
			 (unsigned int)mem_reg->rkey,
			 (unsigned long)mem_reg->va,
			 (unsigned long)mem_reg->len);
495
	} else { /* use FMR for multiple dma entries */
S
Sagi Grimberg 已提交
496 497
		iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev);
		err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,
498
					mem_reg);
499
		if (err && err != -EAGAIN) {
500
			iser_data_buf_dump(mem, ibdev);
501 502 503
			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
				 mem->dma_nents,
				 ntoh24(iser_task->desc.iscsi_header.dlength));
504
			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
S
Sagi Grimberg 已提交
505 506 507 508
				 ib_conn->fmr.page_vec->data_size,
				 ib_conn->fmr.page_vec->length,
				 ib_conn->fmr.page_vec->offset);
			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
509
				iser_err("page_vec[%d] = 0x%llx\n", i,
S
Sagi Grimberg 已提交
510
					 (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
E
Erez Zilber 已提交
511
		}
512 513
		if (err)
			return err;
E
Erez Zilber 已提交
514
	}
515 516
	return 0;
}
517

S
Sagi Grimberg 已提交
518
static void
519 520 521
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
		    struct ib_sig_domain *domain)
{
522
	domain->sig_type = IB_SIG_TYPE_T10_DIF;
S
Sagi Grimberg 已提交
523 524
	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
525 526 527 528 529 530 531
	/*
	 * At the moment we hard code those, but in the future
	 * we will take them from sc.
	 */
	domain->sig.dif.apptag_check_mask = 0xffff;
	domain->sig.dif.app_escape = true;
	domain->sig.dif.ref_escape = true;
S
Sagi Grimberg 已提交
532
	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
533
		domain->sig.dif.ref_remap = true;
534
};
535 536 537 538 539 540 541

static int
iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
{
	switch (scsi_get_prot_op(sc)) {
	case SCSI_PROT_WRITE_INSERT:
	case SCSI_PROT_READ_STRIP:
542
		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
543
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
544 545 546 547
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
		break;
	case SCSI_PROT_READ_INSERT:
	case SCSI_PROT_WRITE_STRIP:
548
		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
549
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
550 551
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
552 553 554
		break;
	case SCSI_PROT_READ_PASS:
	case SCSI_PROT_WRITE_PASS:
555
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
556
		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
557
		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
S
Sagi Grimberg 已提交
558 559
		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
						IB_T10DIF_CSUM : IB_T10DIF_CRC;
560 561 562 563 564 565
		break;
	default:
		iser_err("Unsupported PI operation %d\n",
			 scsi_get_prot_op(sc));
		return -EINVAL;
	}
566

567 568 569
	return 0;
}

S
Sagi Grimberg 已提交
570
static inline void
571 572
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
S
Sagi Grimberg 已提交
573 574 575 576 577
	*mask = 0;
	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
		*mask |= ISER_CHECK_REFTAG;
	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
		*mask |= ISER_CHECK_GUARD;
578 579
}

580 581 582 583 584 585 586 587 588 589 590 591 592 593
static void
iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
	u32 rkey;

	memset(inv_wr, 0, sizeof(*inv_wr));
	inv_wr->opcode = IB_WR_LOCAL_INV;
	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
	inv_wr->ex.invalidate_rkey = mr->rkey;

	rkey = ib_inc_rkey(mr->rkey);
	ib_update_fast_reg_key(mr, rkey);
}

594 595 596 597 598
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
		struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
		struct ib_sge *prot_sge, struct ib_sge *sig_sge)
{
S
Sagi Grimberg 已提交
599
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
600 601 602 603 604 605 606 607 608 609 610
	struct iser_pi_context *pi_ctx = desc->pi_ctx;
	struct ib_send_wr sig_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
	struct ib_sig_attrs sig_attrs;
	int ret;

	memset(&sig_attrs, 0, sizeof(sig_attrs));
	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
	if (ret)
		goto err;

S
Sagi Grimberg 已提交
611
	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
612 613

	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
614
		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
		wr = &inv_wr;
	}

	memset(&sig_wr, 0, sizeof(sig_wr));
	sig_wr.opcode = IB_WR_REG_SIG_MR;
	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
	sig_wr.sg_list = data_sge;
	sig_wr.num_sge = 1;
	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
	if (scsi_prot_sg_count(iser_task->sc))
		sig_wr.wr.sig_handover.prot = prot_sge;
	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
					      IB_ACCESS_REMOTE_READ |
					      IB_ACCESS_REMOTE_WRITE;

	if (!wr)
		wr = &sig_wr;
	else
		wr->next = &sig_wr;

S
Sagi Grimberg 已提交
636
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
637 638 639 640 641 642 643 644
	if (ret) {
		iser_err("reg_sig_mr failed, ret:%d\n", ret);
		goto err;
	}
	desc->reg_indicators &= ~ISER_SIG_KEY_VALID;

	sig_sge->lkey = pi_ctx->sig_mr->lkey;
	sig_sge->addr = 0;
S
Sagi Grimberg 已提交
645
	sig_sge->length = scsi_transfer_length(iser_task->sc);
646 647 648 649 650 651 652 653

	iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
		 sig_sge->addr, sig_sge->length,
		 sig_sge->lkey);
err:
	return ret;
}

654
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
655
			    struct iser_mem_reg *mem_reg,
656
			    struct iser_data_buf *mem,
657
			    enum iser_reg_indicator ind,
658
			    struct ib_sge *sge)
659
{
660
	struct fast_reg_descriptor *desc = mem_reg->mem_h;
S
Sagi Grimberg 已提交
661 662
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
663
	struct ib_device *ibdev = device->ib_device;
664 665
	struct ib_mr *mr;
	struct ib_fast_reg_page_list *frpl;
666 667
	struct ib_send_wr fastreg_wr, inv_wr;
	struct ib_send_wr *bad_wr, *wr = NULL;
668 669 670 671
	int ret, offset, size, plen;

	/* if there a single dma entry, dma mr suffices */
	if (mem->dma_nents == 1) {
672
		struct scatterlist *sg = mem->sg;
673 674 675 676 677 678 679 680 681 682

		sge->lkey = device->mr->lkey;
		sge->addr   = ib_sg_dma_address(ibdev, &sg[0]);
		sge->length  = ib_sg_dma_len(ibdev, &sg[0]);

		iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
			 sge->lkey, sge->addr, sge->length);
		return 0;
	}

683 684 685 686 687 688 689 690 691
	if (ind == ISER_DATA_KEY_VALID) {
		mr = desc->data_mr;
		frpl = desc->data_frpl;
	} else {
		mr = desc->pi_ctx->prot_mr;
		frpl = desc->pi_ctx->prot_frpl;
	}

	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
692 693 694 695 696
				   &offset, &size);
	if (plen * SIZE_4K < size) {
		iser_err("fast reg page_list too short to hold this SG\n");
		return -EINVAL;
	}
697

698
	if (!(desc->reg_indicators & ind)) {
699
		iser_inv_rkey(&inv_wr, mr);
700 701 702 703 704
		wr = &inv_wr;
	}

	/* Prepare FASTREG WR */
	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
705
	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
706
	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
707 708
	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
	fastreg_wr.wr.fast_reg.page_list = frpl;
709
	fastreg_wr.wr.fast_reg.page_list_len = plen;
710
	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
711
	fastreg_wr.wr.fast_reg.length = size;
712
	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
713 714 715 716
	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
					       IB_ACCESS_REMOTE_WRITE |
					       IB_ACCESS_REMOTE_READ);

717
	if (!wr)
718
		wr = &fastreg_wr;
719
	else
720 721
		wr->next = &fastreg_wr;

S
Sagi Grimberg 已提交
722
	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
723 724 725 726
	if (ret) {
		iser_err("fast registration failed, ret:%d\n", ret);
		return ret;
	}
727
	desc->reg_indicators &= ~ind;
728

729 730
	sge->lkey = mr->lkey;
	sge->addr = frpl->page_list[0] + offset;
731
	sge->length = size;
732 733 734 735 736

	return ret;
}

/**
737
 * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
738 739 740 741
 * using Fast Registration WR (if possible) obtaining rkey and va
 *
 * returns 0 on success, errno code on failure
 */
742 743
int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
			      enum iser_data_dir cmd_dir)
744
{
S
Sagi Grimberg 已提交
745 746
	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
	struct iser_device *device = ib_conn->device;
747 748
	struct ib_device *ibdev = device->ib_device;
	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
749
	struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
750 751
	struct fast_reg_descriptor *desc = NULL;
	struct ib_sge data_sge;
752 753 754 755 756
	int err, aligned_len;
	unsigned long flags;

	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
	if (aligned_len != mem->dma_nents) {
757
		err = fall_to_bounce_buf(iser_task, mem,
758 759 760 761 762 763 764
					 cmd_dir, aligned_len);
		if (err) {
			iser_err("failed to allocate bounce buffer\n");
			return err;
		}
	}

765 766
	if (mem->dma_nents != 1 ||
	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
S
Sagi Grimberg 已提交
767 768
		spin_lock_irqsave(&ib_conn->lock, flags);
		desc = list_first_entry(&ib_conn->fastreg.pool,
769 770
					struct fast_reg_descriptor, list);
		list_del(&desc->list);
S
Sagi Grimberg 已提交
771
		spin_unlock_irqrestore(&ib_conn->lock, flags);
772
		mem_reg->mem_h = desc;
773
	}
774

775
	err = iser_fast_reg_mr(iser_task, mem_reg, mem,
776
			       ISER_DATA_KEY_VALID, &data_sge);
777 778 779
	if (err)
		goto err_reg;

780 781 782 783 784 785 786 787
	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
		struct ib_sge prot_sge, sig_sge;

		memset(&prot_sge, 0, sizeof(prot_sge));
		if (scsi_prot_sg_count(iser_task->sc)) {
			mem = &iser_task->prot[cmd_dir];
			aligned_len = iser_data_buf_aligned_len(mem, ibdev);
			if (aligned_len != mem->dma_nents) {
788
				err = fall_to_bounce_buf(iser_task, mem,
789 790 791 792 793 794 795
							 cmd_dir, aligned_len);
				if (err) {
					iser_err("failed to allocate bounce buffer\n");
					return err;
				}
			}

796
			err = iser_fast_reg_mr(iser_task, mem_reg, mem,
797 798 799 800 801 802 803 804 805 806 807 808 809
					       ISER_PROT_KEY_VALID, &prot_sge);
			if (err)
				goto err_reg;
		}

		err = iser_reg_sig_mr(iser_task, desc, &data_sge,
				      &prot_sge, &sig_sge);
		if (err) {
			iser_err("Failed to register signature mr\n");
			return err;
		}
		desc->reg_indicators |= ISER_FASTREG_PROTECTED;

810 811 812 813
		mem_reg->lkey = sig_sge.lkey;
		mem_reg->rkey = desc->pi_ctx->sig_mr->rkey;
		mem_reg->va = sig_sge.addr;
		mem_reg->len = sig_sge.length;
814
	} else {
815
		if (desc)
816
			mem_reg->rkey = desc->data_mr->rkey;
817
		else
818
			mem_reg->rkey = device->mr->rkey;
819

820 821 822
		mem_reg->lkey = data_sge.lkey;
		mem_reg->va = data_sge.addr;
		mem_reg->len = data_sge.length;
823
	}
824

825 826
	return 0;
err_reg:
827
	if (desc) {
S
Sagi Grimberg 已提交
828 829 830
		spin_lock_irqsave(&ib_conn->lock, flags);
		list_add_tail(&desc->list, &ib_conn->fastreg.pool);
		spin_unlock_irqrestore(&ib_conn->lock, flags);
831 832
	}

833 834
	return err;
}