iser_verbs.c 33.6 KB
Newer Older
1 2 3
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4
 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/kernel.h>
#include <linux/module.h>
36
#include <linux/slab.h>
37 38 39 40 41
#include <linux/delay.h>

#include "iscsi_iser.h"

#define ISCSI_ISER_MAX_CONN	8
42 43
#define ISER_MAX_RX_CQ_LEN	(ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
#define ISER_MAX_TX_CQ_LEN	(ISER_QP_MAX_REQ_DTOS  * ISCSI_ISER_MAX_CONN)
44 45 46 47 48 49 50 51 52 53 54 55 56 57

static void iser_cq_tasklet_fn(unsigned long data);
static void iser_cq_callback(struct ib_cq *cq, void *cq_context);

static void iser_cq_event_callback(struct ib_event *cause, void *context)
{
	iser_err("got cq event %d \n", cause->event);
}

static void iser_qp_event_callback(struct ib_event *cause, void *context)
{
	iser_err("got qp event %d\n",cause->event);
}

58 59 60 61 62 63 64
static void iser_event_handler(struct ib_event_handler *handler,
				struct ib_event *event)
{
	iser_err("async event %d on device %s port %d\n", event->event,
		event->device->name, event->element.port_num);
}

65 66 67 68 69 70 71 72 73
/**
 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
 * the adapator.
 *
 * returns 0 on success, -1 on failure
 */
static int iser_create_device_ib_res(struct iser_device *device)
{
74
	struct iser_cq_desc *cq_desc;
75
	struct ib_device_attr *dev_attr = &device->dev_attr;
76
	int ret, i;
77

78 79
	ret = ib_query_device(device->ib_device, dev_attr);
	if (ret) {
80
		pr_warn("Query device failed for %s\n", device->ib_device->name);
81
		return ret;
82 83 84 85 86 87 88 89 90 91 92 93
	}

	/* Assign function handles  - based on FMR support */
	if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
	    device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
		iser_info("FMR supported, using FMR for registration\n");
		device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
		device->iser_free_rdma_reg_res = iser_free_fmr_pool;
		device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
		device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
	} else
	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
94 95 96 97 98
		iser_info("FastReg supported, using FastReg for registration\n");
		device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool;
		device->iser_free_rdma_reg_res = iser_free_fastreg_pool;
		device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg;
		device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;
99
	} else {
100
		iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
101
		return -1;
102
	}
103

104
	device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
105 106 107
	iser_info("using %d CQs, device %s supports %d vectors\n",
		  device->cqs_used, device->ib_device->name,
		  device->ib_device->num_comp_vectors);
108 109 110 111 112 113 114

	device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
				  GFP_KERNEL);
	if (device->cq_desc == NULL)
		goto cq_desc_err;
	cq_desc = device->cq_desc;

115 116 117 118
	device->pd = ib_alloc_pd(device->ib_device);
	if (IS_ERR(device->pd))
		goto pd_err;

119 120 121 122 123 124 125 126 127
	for (i = 0; i < device->cqs_used; i++) {
		cq_desc[i].device   = device;
		cq_desc[i].cq_index = i;

		device->rx_cq[i] = ib_create_cq(device->ib_device,
					  iser_cq_callback,
					  iser_cq_event_callback,
					  (void *)&cq_desc[i],
					  ISER_MAX_RX_CQ_LEN, i);
128 129
		if (IS_ERR(device->rx_cq[i])) {
			device->rx_cq[i] = NULL;
130
			goto cq_err;
131
		}
132

133 134 135 136
		device->tx_cq[i] = ib_create_cq(device->ib_device,
					  NULL, iser_cq_event_callback,
					  (void *)&cq_desc[i],
					  ISER_MAX_TX_CQ_LEN, i);
137

138 139
		if (IS_ERR(device->tx_cq[i])) {
			device->tx_cq[i] = NULL;
140
			goto cq_err;
141
		}
142

143 144
		if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
			goto cq_err;
145

146 147 148 149
		tasklet_init(&device->cq_tasklet[i],
			     iser_cq_tasklet_fn,
			(unsigned long)&cq_desc[i]);
	}
150

151 152 153
	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
				   IB_ACCESS_REMOTE_WRITE |
				   IB_ACCESS_REMOTE_READ);
154 155 156
	if (IS_ERR(device->mr))
		goto dma_mr_err;

157 158 159 160 161
	INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
				iser_event_handler);
	if (ib_register_event_handler(&device->event_handler))
		goto handler_err;

162 163
	return 0;

164 165
handler_err:
	ib_dereg_mr(device->mr);
166
dma_mr_err:
167 168
	for (i = 0; i < device->cqs_used; i++)
		tasklet_kill(&device->cq_tasklet[i]);
169
cq_err:
170 171 172 173 174
	for (i = 0; i < device->cqs_used; i++) {
		if (device->tx_cq[i])
			ib_destroy_cq(device->tx_cq[i]);
		if (device->rx_cq[i])
			ib_destroy_cq(device->rx_cq[i]);
175
	}
176 177
	ib_dealloc_pd(device->pd);
pd_err:
178 179
	kfree(device->cq_desc);
cq_desc_err:
180 181 182 183 184
	iser_err("failed to allocate an IB resource\n");
	return -1;
}

/**
185
 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
186 187 188 189
 * CQ and PD created with the device associated with the adapator.
 */
static void iser_free_device_ib_res(struct iser_device *device)
{
190
	int i;
191 192
	BUG_ON(device->mr == NULL);

193 194 195 196 197 198 199 200
	for (i = 0; i < device->cqs_used; i++) {
		tasklet_kill(&device->cq_tasklet[i]);
		(void)ib_destroy_cq(device->tx_cq[i]);
		(void)ib_destroy_cq(device->rx_cq[i]);
		device->tx_cq[i] = NULL;
		device->rx_cq[i] = NULL;
	}

201
	(void)ib_unregister_event_handler(&device->event_handler);
202 203 204
	(void)ib_dereg_mr(device->mr);
	(void)ib_dealloc_pd(device->pd);

205 206
	kfree(device->cq_desc);

207 208 209 210 211
	device->mr = NULL;
	device->pd = NULL;
}

/**
212
 * iser_create_fmr_pool - Creates FMR pool and page_vector
213
 *
214
 * returns 0 on success, or errno code on failure
215
 */
216
int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
217
{
218
	struct iser_device *device = ib_conn->device;
219
	struct ib_fmr_pool_param params;
220
	int ret = -ENOMEM;
221

222 223 224 225
	ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) +
					(sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
					GFP_KERNEL);
	if (!ib_conn->fmr.page_vec)
226
		return ret;
227

228
	ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);
229

230
	params.page_shift        = SHIFT_4K;
231 232 233 234 235
	/* when the first/last SG element are not start/end *
	 * page aligned, the map whould be of N+1 pages     */
	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
	/* make the pool size twice the max number of SCSI commands *
	 * the ML is expected to queue, watermark for unmap at 50%  */
236 237
	params.pool_size	 = cmds_max * 2;
	params.dirty_watermark	 = cmds_max;
238 239 240 241 242 243
	params.cache		 = 0;
	params.flush_function	 = NULL;
	params.access		 = (IB_ACCESS_LOCAL_WRITE  |
				    IB_ACCESS_REMOTE_WRITE |
				    IB_ACCESS_REMOTE_READ);

244 245
	ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, &params);
	if (!IS_ERR(ib_conn->fmr.pool))
246 247 248
		return 0;

	/* no FMR => no need for page_vec */
249 250
	kfree(ib_conn->fmr.page_vec);
	ib_conn->fmr.page_vec = NULL;
251

252 253
	ret = PTR_ERR(ib_conn->fmr.pool);
	ib_conn->fmr.pool = NULL;
254 255 256 257
	if (ret != -ENOSYS) {
		iser_err("FMR allocation failed, err %d\n", ret);
		return ret;
	} else {
258
		iser_warn("FMRs are not supported, using unaligned mode\n");
259
		return 0;
260
	}
261 262 263 264 265 266 267 268
}

/**
 * iser_free_fmr_pool - releases the FMR pool and page vec
 */
void iser_free_fmr_pool(struct iser_conn *ib_conn)
{
	iser_info("freeing conn %p fmr pool %p\n",
269
		  ib_conn, ib_conn->fmr.pool);
270

271 272
	if (ib_conn->fmr.pool != NULL)
		ib_destroy_fmr_pool(ib_conn->fmr.pool);
273

274
	ib_conn->fmr.pool = NULL;
275

276 277
	kfree(ib_conn->fmr.page_vec);
	ib_conn->fmr.page_vec = NULL;
278 279
}

280 281
static int
iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
282
			 bool pi_enable, struct fast_reg_descriptor *desc)
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
{
	int ret;

	desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
						      ISCSI_ISER_SG_TABLESIZE + 1);
	if (IS_ERR(desc->data_frpl)) {
		ret = PTR_ERR(desc->data_frpl);
		iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
			 ret);
		return PTR_ERR(desc->data_frpl);
	}

	desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
	if (IS_ERR(desc->data_mr)) {
		ret = PTR_ERR(desc->data_mr);
		iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
		goto fast_reg_mr_failure;
	}
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
	desc->reg_indicators |= ISER_DATA_KEY_VALID;

	if (pi_enable) {
		struct ib_mr_init_attr mr_init_attr = {0};
		struct iser_pi_context *pi_ctx = NULL;

		desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
		if (!desc->pi_ctx) {
			iser_err("Failed to allocate pi context\n");
			ret = -ENOMEM;
			goto pi_ctx_alloc_failure;
		}
		pi_ctx = desc->pi_ctx;

		pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
						    ISCSI_ISER_SG_TABLESIZE);
		if (IS_ERR(pi_ctx->prot_frpl)) {
			ret = PTR_ERR(pi_ctx->prot_frpl);
			iser_err("Failed to allocate prot frpl ret=%d\n",
				 ret);
			goto prot_frpl_failure;
		}

		pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
						ISCSI_ISER_SG_TABLESIZE + 1);
		if (IS_ERR(pi_ctx->prot_mr)) {
			ret = PTR_ERR(pi_ctx->prot_mr);
			iser_err("Failed to allocate prot frmr ret=%d\n",
				 ret);
			goto prot_mr_failure;
		}
		desc->reg_indicators |= ISER_PROT_KEY_VALID;

		mr_init_attr.max_reg_descriptors = 2;
		mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
		pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
		if (IS_ERR(pi_ctx->sig_mr)) {
			ret = PTR_ERR(pi_ctx->sig_mr);
			iser_err("Failed to allocate signature enabled mr err=%d\n",
				 ret);
			goto sig_mr_failure;
		}
		desc->reg_indicators |= ISER_SIG_KEY_VALID;
	}
	desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;

347 348
	iser_dbg("Create fr_desc %p page_list %p\n",
		 desc, desc->data_frpl->page_list);
349 350

	return 0;
351 352 353 354 355 356 357 358
sig_mr_failure:
	ib_dereg_mr(desc->pi_ctx->prot_mr);
prot_mr_failure:
	ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
prot_frpl_failure:
	kfree(desc->pi_ctx);
pi_ctx_alloc_failure:
	ib_dereg_mr(desc->data_mr);
359 360 361 362 363 364
fast_reg_mr_failure:
	ib_free_fast_reg_page_list(desc->data_frpl);

	return ret;
}

365
/**
366
 * iser_create_fastreg_pool - Creates pool of fast_reg descriptors
367 368 369
 * for fast registration work requests.
 * returns 0 on success, or errno code on failure
 */
370
int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max)
371 372 373 374 375
{
	struct iser_device	*device = ib_conn->device;
	struct fast_reg_descriptor	*desc;
	int i, ret;

376 377
	INIT_LIST_HEAD(&ib_conn->fastreg.pool);
	ib_conn->fastreg.pool_size = 0;
378
	for (i = 0; i < cmds_max; i++) {
379
		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
380 381 382 383 384 385
		if (!desc) {
			iser_err("Failed to allocate a new fast_reg descriptor\n");
			ret = -ENOMEM;
			goto err;
		}

386 387
		ret = iser_create_fastreg_desc(device->ib_device, device->pd,
					       ib_conn->pi_support, desc);
388 389 390 391 392
		if (ret) {
			iser_err("Failed to create fastreg descriptor err=%d\n",
				 ret);
			kfree(desc);
			goto err;
393 394
		}

395 396
		list_add_tail(&desc->list, &ib_conn->fastreg.pool);
		ib_conn->fastreg.pool_size++;
397 398 399
	}

	return 0;
400

401
err:
402
	iser_free_fastreg_pool(ib_conn);
403 404 405 406
	return ret;
}

/**
407
 * iser_free_fastreg_pool - releases the pool of fast_reg descriptors
408
 */
409
void iser_free_fastreg_pool(struct iser_conn *ib_conn)
410 411 412 413
{
	struct fast_reg_descriptor *desc, *tmp;
	int i = 0;

414
	if (list_empty(&ib_conn->fastreg.pool))
415 416
		return;

417
	iser_info("freeing conn %p fr pool\n", ib_conn);
418

419
	list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
420 421 422
		list_del(&desc->list);
		ib_free_fast_reg_page_list(desc->data_frpl);
		ib_dereg_mr(desc->data_mr);
423 424 425 426 427 428
		if (desc->pi_ctx) {
			ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
			ib_dereg_mr(desc->pi_ctx->prot_mr);
			ib_destroy_mr(desc->pi_ctx->sig_mr);
			kfree(desc->pi_ctx);
		}
429 430 431 432
		kfree(desc);
		++i;
	}

433
	if (i < ib_conn->fastreg.pool_size)
434
		iser_warn("pool still has %d regions registered\n",
435
			  ib_conn->fastreg.pool_size - i);
436 437
}

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
/**
 * iser_create_ib_conn_res - Queue-Pair (QP)
 *
 * returns 0 on success, -1 on failure
 */
static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
{
	struct iser_device	*device;
	struct ib_qp_init_attr	init_attr;
	int			ret = -ENOMEM;
	int index, min_index = 0;

	BUG_ON(ib_conn->device == NULL);

	device = ib_conn->device;
453 454 455

	memset(&init_attr, 0, sizeof init_attr);

456 457 458 459 460 461 462 463
	mutex_lock(&ig.connlist_mutex);
	/* select the CQ with the minimal number of usages */
	for (index = 0; index < device->cqs_used; index++)
		if (device->cq_active_qps[index] <
		    device->cq_active_qps[min_index])
			min_index = index;
	device->cq_active_qps[min_index]++;
	mutex_unlock(&ig.connlist_mutex);
464
	iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
465

466 467
	init_attr.event_handler = iser_qp_event_callback;
	init_attr.qp_context	= (void *)ib_conn;
468 469
	init_attr.send_cq	= device->tx_cq[min_index];
	init_attr.recv_cq	= device->rx_cq[min_index];
470
	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
471
	init_attr.cap.max_send_sge = 2;
472
	init_attr.cap.max_recv_sge = 1;
473 474
	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;
	init_attr.qp_type	= IB_QPT_RC;
475 476 477 478 479 480
	if (ib_conn->pi_support) {
		init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS;
		init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
	} else {
		init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
	}
481 482 483

	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
	if (ret)
484
		goto out_err;
485 486

	ib_conn->qp = ib_conn->cma_id->qp;
487
	iser_info("setting conn %p cma_id %p qp %p\n",
488
		  ib_conn, ib_conn->cma_id,
489
		  ib_conn->cma_id->qp);
490 491
	return ret;

492
out_err:
493 494 495 496 497
	iser_err("unable to alloc mem or create resource, err %d\n", ret);
	return ret;
}

/**
498
 * releases the QP object
499
 */
500
static void iser_free_ib_conn_res(struct iser_conn *ib_conn)
501
{
502
	int cq_index;
503 504
	BUG_ON(ib_conn == NULL);

505
	iser_info("freeing conn %p cma_id %p qp %p\n",
506
		  ib_conn, ib_conn->cma_id,
507
		  ib_conn->qp);
508 509 510

	/* qp is created only once both addr & route are resolved */

511 512 513
	if (ib_conn->qp != NULL) {
		cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
		ib_conn->device->cq_active_qps[cq_index]--;
514

515 516
		rdma_destroy_qp(ib_conn->cma_id);
	}
517 518 519 520 521 522 523 524 525 526 527

	ib_conn->qp	  = NULL;
}

/**
 * based on the resolved device node GUID see if there already allocated
 * device for this device. If there's no such, create one.
 */
static
struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
{
A
Arne Redlich 已提交
528
	struct iser_device *device;
529 530 531

	mutex_lock(&ig.device_list_mutex);

A
Arne Redlich 已提交
532
	list_for_each_entry(device, &ig.device_list, ig_list)
533 534
		/* find if there's a match using the node GUID */
		if (device->ib_device->node_guid == cma_id->device->node_guid)
535
			goto inc_refcnt;
A
Arne Redlich 已提交
536 537 538 539 540 541 542 543 544 545 546 547

	device = kzalloc(sizeof *device, GFP_KERNEL);
	if (device == NULL)
		goto out;

	/* assign this device to the device */
	device->ib_device = cma_id->device;
	/* init the device and link it into ig device list */
	if (iser_create_device_ib_res(device)) {
		kfree(device);
		device = NULL;
		goto out;
548
	}
A
Arne Redlich 已提交
549 550
	list_add(&device->ig_list, &ig.device_list);

551
inc_refcnt:
552
	device->refcount++;
553
out:
554 555 556 557 558 559 560 561 562
	mutex_unlock(&ig.device_list_mutex);
	return device;
}

/* if there's no demand for this device, release it */
static void iser_device_try_release(struct iser_device *device)
{
	mutex_lock(&ig.device_list_mutex);
	device->refcount--;
563
	iser_info("device %p refcount %d\n", device, device->refcount);
564 565 566 567 568 569 570 571
	if (!device->refcount) {
		iser_free_device_ib_res(device);
		list_del(&device->ig_list);
		kfree(device);
	}
	mutex_unlock(&ig.device_list_mutex);
}

572 573 574
/**
 * Called with state mutex held
 **/
575 576 577 578 579 580 581 582 583 584 585
static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
				     enum iser_ib_conn_state comp,
				     enum iser_ib_conn_state exch)
{
	int ret;

	if ((ret = (ib_conn->state == comp)))
		ib_conn->state = exch;
	return ret;
}

586 587 588
void iser_release_work(struct work_struct *work)
{
	struct iser_conn *ib_conn;
589
	int rc;
590 591 592 593

	ib_conn = container_of(work, struct iser_conn, release_work);

	/* wait for .conn_stop callback */
594 595
	rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ);
	WARN_ON(rc == 0);
596 597

	/* wait for the qp`s post send and post receive buffers to empty */
598 599 600 601
	rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ);
	WARN_ON(rc == 0);

	ib_conn->state = ISER_CONN_DOWN;
602

603 604 605 606
	mutex_lock(&ib_conn->state_mutex);
	ib_conn->state = ISER_CONN_DOWN;
	mutex_unlock(&ib_conn->state_mutex);

607 608 609
	iser_conn_release(ib_conn);
}

610 611 612
/**
 * Frees all conn objects and deallocs conn descriptor
 */
613
void iser_conn_release(struct iser_conn *ib_conn)
614 615 616 617 618 619
{
	struct iser_device  *device = ib_conn->device;

	mutex_lock(&ig.connlist_mutex);
	list_del(&ib_conn->conn_list);
	mutex_unlock(&ig.connlist_mutex);
620 621 622 623

	mutex_lock(&ib_conn->state_mutex);
	BUG_ON(ib_conn->state != ISER_CONN_DOWN);

624
	iser_free_rx_descriptors(ib_conn);
R
Roi Dayan 已提交
625
	iser_free_ib_conn_res(ib_conn);
626 627 628 629
	ib_conn->device = NULL;
	/* on EVENT_ADDR_ERROR there's no device yet for this conn */
	if (device != NULL)
		iser_device_try_release(device);
630 631
	mutex_unlock(&ib_conn->state_mutex);

R
Roi Dayan 已提交
632
	/* if cma handler context, the caller actually destroy the id */
633
	if (ib_conn->cma_id != NULL) {
R
Roi Dayan 已提交
634 635 636
		rdma_destroy_id(ib_conn->cma_id);
		ib_conn->cma_id = NULL;
	}
637
	kfree(ib_conn);
638 639
}

640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
/**
 * triggers start of the disconnect procedures and wait for them to be done
 */
void iser_conn_terminate(struct iser_conn *ib_conn)
{
	int err = 0;

	/* change the ib conn state only if the conn is UP, however always call
	 * rdma_disconnect since this is the only way to cause the CMA to change
	 * the QP state to ERROR
	 */

	iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING);
	err = rdma_disconnect(ib_conn->cma_id);
	if (err)
		iser_err("Failed to disconnect, conn: 0x%p err %d\n",
			 ib_conn,err);
}

659 660 661
/**
 * Called with state mutex held
 **/
662
static void iser_connect_error(struct rdma_cm_id *cma_id)
663 664
{
	struct iser_conn *ib_conn;
665

666 667 668 669
	ib_conn = (struct iser_conn *)cma_id->context;
	ib_conn->state = ISER_CONN_DOWN;
}

670 671 672
/**
 * Called with state mutex held
 **/
673
static void iser_addr_handler(struct rdma_cm_id *cma_id)
674 675 676 677 678
{
	struct iser_device *device;
	struct iser_conn   *ib_conn;
	int    ret;

679 680 681 682 683
	ib_conn = (struct iser_conn *)cma_id->context;
	if (ib_conn->state != ISER_CONN_PENDING)
		/* bailout */
		return;

684
	device = iser_device_find_by_ib_device(cma_id);
685 686
	if (!device) {
		iser_err("device lookup/creation failed\n");
687 688
		iser_connect_error(cma_id);
		return;
689 690
	}

691 692
	ib_conn->device = device;

693 694 695 696 697 698 699 700 701 702 703 704 705
	/* connection T10-PI support */
	if (iser_pi_enable) {
		if (!(device->dev_attr.device_cap_flags &
		      IB_DEVICE_SIGNATURE_HANDOVER)) {
			iser_warn("T10-PI requested but not supported on %s, "
				  "continue without T10-PI\n",
				  ib_conn->device->ib_device->name);
			ib_conn->pi_support = false;
		} else {
			ib_conn->pi_support = true;
		}
	}

706 707 708
	ret = rdma_resolve_route(cma_id, 1000);
	if (ret) {
		iser_err("resolve route failed: %d\n", ret);
709 710
		iser_connect_error(cma_id);
		return;
711 712 713
	}
}

714 715 716
/**
 * Called with state mutex held
 **/
717
static void iser_route_handler(struct rdma_cm_id *cma_id)
718 719 720
{
	struct rdma_conn_param conn_param;
	int    ret;
721
	struct iser_cm_hdr req_hdr;
722 723
	struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context;
	struct iser_device *device = ib_conn->device;
724

725 726 727 728
	if (ib_conn->state != ISER_CONN_PENDING)
		/* bailout */
		return;

729 730 731 732 733
	ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
	if (ret)
		goto failure;

	memset(&conn_param, 0, sizeof conn_param);
734
	conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
735 736 737 738
	conn_param.initiator_depth     = 1;
	conn_param.retry_count	       = 7;
	conn_param.rnr_retry_count     = 6;

739 740 741 742 743 744
	memset(&req_hdr, 0, sizeof(req_hdr));
	req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
			ISER_SEND_W_INV_NOT_SUPPORTED);
	conn_param.private_data		= (void *)&req_hdr;
	conn_param.private_data_len	= sizeof(struct iser_cm_hdr);

745 746 747 748 749 750
	ret = rdma_connect(cma_id, &conn_param);
	if (ret) {
		iser_err("failure connecting: %d\n", ret);
		goto failure;
	}

751
	return;
752
failure:
753
	iser_connect_error(cma_id);
754 755 756 757 758
}

static void iser_connected_handler(struct rdma_cm_id *cma_id)
{
	struct iser_conn *ib_conn;
759 760 761
	struct ib_qp_attr attr;
	struct ib_qp_init_attr init_attr;

762 763 764 765 766
	ib_conn = (struct iser_conn *)cma_id->context;
	if (ib_conn->state != ISER_CONN_PENDING)
		/* bailout */
		return;

767 768
	(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
	iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
769

770 771
	ib_conn->state = ISER_CONN_UP;
	complete(&ib_conn->up_completion);
772 773
}

774
static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
775 776 777 778 779 780 781 782
{
	struct iser_conn *ib_conn;

	ib_conn = (struct iser_conn *)cma_id->context;

	/* getting here when the state is UP means that the conn is being *
	 * terminated asynchronously from the iSCSI layer's perspective.  */
	if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
783
					ISER_CONN_TERMINATING)){
784 785
		if (ib_conn->iscsi_conn)
			iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED);
786 787 788
		else
			iser_err("iscsi_iser connection isn't bound\n");
	}
789

790 791 792 793
	/* Complete the termination process if no posts are pending. This code
	 * block also exists in iser_handle_comp_error(), but it is needed here
	 * for cases of no flushes at all, e.g. discovery over rdma.
	 */
794
	if (ib_conn->post_recv_buf_count == 0 &&
795
	    (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
796
		complete(&ib_conn->flush_completion);
797 798 799 800 801
	}
}

static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
802 803 804
	struct iser_conn *ib_conn;

	ib_conn = (struct iser_conn *)cma_id->context;
805 806
	iser_info("event %d status %d conn %p id %p\n",
		  event->event, event->status, cma_id->context, cma_id);
807

808
	mutex_lock(&ib_conn->state_mutex);
809 810
	switch (event->event) {
	case RDMA_CM_EVENT_ADDR_RESOLVED:
811
		iser_addr_handler(cma_id);
812 813
		break;
	case RDMA_CM_EVENT_ROUTE_RESOLVED:
814
		iser_route_handler(cma_id);
815 816 817 818 819 820 821 822 823
		break;
	case RDMA_CM_EVENT_ESTABLISHED:
		iser_connected_handler(cma_id);
		break;
	case RDMA_CM_EVENT_ADDR_ERROR:
	case RDMA_CM_EVENT_ROUTE_ERROR:
	case RDMA_CM_EVENT_CONNECT_ERROR:
	case RDMA_CM_EVENT_UNREACHABLE:
	case RDMA_CM_EVENT_REJECTED:
824
		iser_connect_error(cma_id);
825 826 827
		break;
	case RDMA_CM_EVENT_DISCONNECTED:
	case RDMA_CM_EVENT_DEVICE_REMOVAL:
828
	case RDMA_CM_EVENT_ADDR_CHANGE:
829
	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
830
		iser_disconnected_handler(cma_id);
831 832
		break;
	default:
833
		iser_err("Unexpected RDMA CM event (%d)\n", event->event);
834 835
		break;
	}
836
	mutex_unlock(&ib_conn->state_mutex);
837
	return 0;
838 839
}

840
void iser_conn_init(struct iser_conn *ib_conn)
841 842
{
	ib_conn->state = ISER_CONN_INIT;
843
	ib_conn->post_recv_buf_count = 0;
844
	atomic_set(&ib_conn->post_send_buf_count, 0);
845
	init_completion(&ib_conn->stop_completion);
846 847
	init_completion(&ib_conn->flush_completion);
	init_completion(&ib_conn->up_completion);
848 849
	INIT_LIST_HEAD(&ib_conn->conn_list);
	spin_lock_init(&ib_conn->lock);
850
	mutex_init(&ib_conn->state_mutex);
851 852 853 854
}

 /**
 * starts the process of connecting to the target
855
 * sleeps until the connection is established or rejected
856 857
 */
int iser_connect(struct iser_conn   *ib_conn,
R
Roi Dayan 已提交
858 859
		 struct sockaddr    *src_addr,
		 struct sockaddr    *dst_addr,
860 861 862 863
		 int                 non_blocking)
{
	int err = 0;

864 865
	mutex_lock(&ib_conn->state_mutex);

R
Roi Dayan 已提交
866 867 868
	sprintf(ib_conn->name, "%pISp", dst_addr);

	iser_info("connecting to: %s\n", ib_conn->name);
869 870 871 872 873 874 875 876

	/* the device is known only --after-- address resolution */
	ib_conn->device = NULL;

	ib_conn->state = ISER_CONN_PENDING;

	ib_conn->cma_id = rdma_create_id(iser_cma_handler,
					     (void *)ib_conn,
877
					     RDMA_PS_TCP, IB_QPT_RC);
878 879 880 881 882 883
	if (IS_ERR(ib_conn->cma_id)) {
		err = PTR_ERR(ib_conn->cma_id);
		iser_err("rdma_create_id failed: %d\n", err);
		goto id_failure;
	}

R
Roi Dayan 已提交
884
	err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
885 886 887 888 889 890
	if (err) {
		iser_err("rdma_resolve_addr failed: %d\n", err);
		goto addr_failure;
	}

	if (!non_blocking) {
891
		wait_for_completion_interruptible(&ib_conn->up_completion);
892 893 894 895 896 897

		if (ib_conn->state != ISER_CONN_UP) {
			err =  -EIO;
			goto connect_failure;
		}
	}
898
	mutex_unlock(&ib_conn->state_mutex);
899 900 901 902 903 904 905 906 907 908 909

	mutex_lock(&ig.connlist_mutex);
	list_add(&ib_conn->conn_list, &ig.connlist);
	mutex_unlock(&ig.connlist_mutex);
	return 0;

id_failure:
	ib_conn->cma_id = NULL;
addr_failure:
	ib_conn->state = ISER_CONN_DOWN;
connect_failure:
910
	mutex_unlock(&ib_conn->state_mutex);
911
	iser_conn_release(ib_conn);
912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
	return err;
}

/**
 * iser_reg_page_vec - Register physical memory
 *
 * returns: 0 on success, errno code on failure
 */
int iser_reg_page_vec(struct iser_conn     *ib_conn,
		      struct iser_page_vec *page_vec,
		      struct iser_mem_reg  *mem_reg)
{
	struct ib_pool_fmr *mem;
	u64		   io_addr;
	u64		   *page_list;
	int		   status;

	page_list = page_vec->pages;
	io_addr	  = page_list[0];

932
	mem  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
933 934
				    page_list,
				    page_vec->length,
935
				    io_addr);
936 937 938 939 940 941 942 943 944

	if (IS_ERR(mem)) {
		status = (int)PTR_ERR(mem);
		iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
		return status;
	}

	mem_reg->lkey  = mem->fmr->lkey;
	mem_reg->rkey  = mem->fmr->rkey;
945
	mem_reg->len   = page_vec->length * SIZE_4K;
946
	mem_reg->va    = io_addr;
947
	mem_reg->is_mr = 1;
948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
	mem_reg->mem_h = (void *)mem;

	mem_reg->va   += page_vec->offset;
	mem_reg->len   = page_vec->data_size;

	iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
		 "entry[0]: (0x%08lx,%ld)] -> "
		 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
		 page_vec, page_vec->length,
		 (unsigned long)page_vec->pages[0],
		 (unsigned long)page_vec->data_size,
		 (unsigned int)mem_reg->lkey, mem_reg->mem_h,
		 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
	return 0;
}

/**
965 966
 * Unregister (previosuly registered using FMR) memory.
 * If memory is non-FMR does nothing.
967
 */
968 969
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
			enum iser_data_dir cmd_dir)
970
{
971
	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
972 973
	int ret;

974
	if (!reg->is_mr)
975 976
		return;

977 978 979 980 981 982 983 984 985
	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);

	ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
	if (ret)
		iser_err("ib_fmr_pool_unmap failed %d\n", ret);

	reg->mem_h = NULL;
}

986 987
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
			    enum iser_data_dir cmd_dir)
988 989
{
	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
990
	struct iser_conn *ib_conn = iser_task->ib_conn;
991 992 993 994 995 996 997 998
	struct fast_reg_descriptor *desc = reg->mem_h;

	if (!reg->is_mr)
		return;

	reg->mem_h = NULL;
	reg->is_mr = 0;
	spin_lock_bh(&ib_conn->lock);
999
	list_add_tail(&desc->list, &ib_conn->fastreg.pool);
1000 1001 1002
	spin_unlock_bh(&ib_conn->lock);
}

1003 1004 1005 1006 1007 1008
int iser_post_recvl(struct iser_conn *ib_conn)
{
	struct ib_recv_wr rx_wr, *rx_wr_failed;
	struct ib_sge	  sge;
	int ib_ret;

1009
	sge.addr   = ib_conn->login_resp_dma;
1010 1011 1012
	sge.length = ISER_RX_LOGIN_SIZE;
	sge.lkey   = ib_conn->device->mr->lkey;

1013
	rx_wr.wr_id   = (unsigned long)ib_conn->login_resp_buf;
1014 1015 1016 1017
	rx_wr.sg_list = &sge;
	rx_wr.num_sge = 1;
	rx_wr.next    = NULL;

1018
	ib_conn->post_recv_buf_count++;
1019 1020 1021
	ib_ret	= ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
	if (ib_ret) {
		iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1022
		ib_conn->post_recv_buf_count--;
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
	}
	return ib_ret;
}

int iser_post_recvm(struct iser_conn *ib_conn, int count)
{
	struct ib_recv_wr *rx_wr, *rx_wr_failed;
	int i, ib_ret;
	unsigned int my_rx_head = ib_conn->rx_desc_head;
	struct iser_rx_desc *rx_desc;

	for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
		rx_desc		= &ib_conn->rx_descs[my_rx_head];
		rx_wr->wr_id	= (unsigned long)rx_desc;
		rx_wr->sg_list	= &rx_desc->rx_sg;
		rx_wr->num_sge	= 1;
		rx_wr->next	= rx_wr + 1;
1040
		my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
1041 1042 1043 1044 1045
	}

	rx_wr--;
	rx_wr->next = NULL; /* mark end of work requests list */

1046
	ib_conn->post_recv_buf_count += count;
1047 1048 1049
	ib_ret	= ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
	if (ib_ret) {
		iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1050
		ib_conn->post_recv_buf_count -= count;
1051 1052 1053 1054 1055 1056
	} else
		ib_conn->rx_desc_head = my_rx_head;
	return ib_ret;
}


1057 1058 1059 1060 1061
/**
 * iser_start_send - Initiate a Send DTO operation
 *
 * returns 0 on success, -1 on failure
 */
1062
int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
1063
{
1064
	int		  ib_ret;
1065 1066
	struct ib_send_wr send_wr, *send_wr_failed;

1067 1068
	ib_dma_sync_single_for_device(ib_conn->device->ib_device,
		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
1069 1070 1071

	send_wr.next	   = NULL;
	send_wr.wr_id	   = (unsigned long)tx_desc;
1072 1073
	send_wr.sg_list	   = tx_desc->tx_sg;
	send_wr.num_sge	   = tx_desc->num_sge;
1074
	send_wr.opcode	   = IB_WR_SEND;
1075
	send_wr.send_flags = IB_SEND_SIGNALED;
1076 1077 1078 1079 1080 1081 1082 1083

	atomic_inc(&ib_conn->post_send_buf_count);

	ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
	if (ib_ret) {
		iser_err("ib_post_send failed, ret:%d\n", ib_ret);
		atomic_dec(&ib_conn->post_send_buf_count);
	}
1084
	return ib_ret;
1085 1086
}

1087
static void iser_handle_comp_error(struct iser_tx_desc *desc,
1088
				struct iser_conn *ib_conn)
1089
{
1090 1091
	if (desc && desc->type == ISCSI_TX_DATAOUT)
		kmem_cache_free(ig.desc_cache, desc);
1092

1093
	if (ib_conn->post_recv_buf_count == 0 &&
1094
	    atomic_read(&ib_conn->post_send_buf_count) == 0) {
1095 1096 1097 1098 1099 1100 1101
		/**
		 * getting here when the state is UP means that the conn is
		 * being terminated asynchronously from the iSCSI layer's
		 * perspective. It is safe to peek at the connection state
		 * since iscsi_conn_failure is allowed to be called twice.
		 **/
		if (ib_conn->state == ISER_CONN_UP)
1102
			iscsi_conn_failure(ib_conn->iscsi_conn,
1103 1104
					   ISCSI_ERR_CONN_FAILED);

1105 1106
		/* no more non completed posts to the QP, complete the
		 * termination process w.o worrying on disconnect event */
1107
		complete(&ib_conn->flush_completion);
1108
	}
1109 1110
}

1111
static int iser_drain_tx_cq(struct iser_device  *device, int cq_index)
1112
{
1113
	struct ib_cq  *cq = device->tx_cq[cq_index];
1114
	struct ib_wc  wc;
1115
	struct iser_tx_desc *tx_desc;
1116 1117 1118 1119
	struct iser_conn *ib_conn;
	int completed_tx = 0;

	while (ib_poll_cq(cq, 1, &wc) == 1) {
1120
		tx_desc	= (struct iser_tx_desc *) (unsigned long) wc.wr_id;
1121 1122 1123
		ib_conn = wc.qp->qp_context;
		if (wc.status == IB_WC_SUCCESS) {
			if (wc.opcode == IB_WC_SEND)
1124
				iser_snd_completion(tx_desc, ib_conn);
1125
			else
1126 1127 1128 1129
				iser_err("expected opcode %d got %d\n",
					IB_WC_SEND, wc.opcode);
		} else {
			iser_err("tx id %llx status %d vend_err %x\n",
1130
				 wc.wr_id, wc.status, wc.vendor_err);
1131
			if (wc.wr_id != ISER_FASTREG_LI_WRID) {
1132 1133 1134
				atomic_dec(&ib_conn->post_send_buf_count);
				iser_handle_comp_error(tx_desc, ib_conn);
			}
1135 1136 1137 1138 1139 1140 1141
		}
		completed_tx++;
	}
	return completed_tx;
}


1142 1143
static void iser_cq_tasklet_fn(unsigned long data)
{
1144 1145 1146 1147
	struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
	struct iser_device  *device = cq_desc->device;
	int cq_index = cq_desc->cq_index;
	struct ib_cq	     *cq = device->rx_cq[cq_index];
1148
	 struct ib_wc	     wc;
1149
	 struct iser_rx_desc *desc;
1150
	 unsigned long	     xfer_len;
1151
	struct iser_conn *ib_conn;
1152 1153 1154 1155 1156 1157
	int completed_tx, completed_rx = 0;

	/* First do tx drain, so in a case where we have rx flushes and a successful
	 * tx completion we will still go through completion error handling.
	 */
	completed_tx = iser_drain_tx_cq(device, cq_index);
1158 1159

	while (ib_poll_cq(cq, 1, &wc) == 1) {
1160
		desc	 = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
1161
		BUG_ON(desc == NULL);
1162
		ib_conn = wc.qp->qp_context;
1163
		if (wc.status == IB_WC_SUCCESS) {
1164
			if (wc.opcode == IB_WC_RECV) {
1165
				xfer_len = (unsigned long)wc.byte_len;
1166 1167 1168 1169
				iser_rcv_completion(desc, xfer_len, ib_conn);
			} else
				iser_err("expected opcode %d got %d\n",
					IB_WC_RECV, wc.opcode);
1170
		} else {
1171
			if (wc.status != IB_WC_WR_FLUSH_ERR)
1172
				iser_err("rx id %llx status %d vend_err %x\n",
1173
					wc.wr_id, wc.status, wc.vendor_err);
1174 1175
			ib_conn->post_recv_buf_count--;
			iser_handle_comp_error(NULL, ib_conn);
1176
		}
1177 1178
		completed_rx++;
		if (!(completed_rx & 63))
1179
			completed_tx += iser_drain_tx_cq(device, cq_index);
1180 1181 1182 1183
	}
	/* #warning "it is assumed here that arming CQ only once its empty" *
	 * " would not cause interrupts to be missed"                       */
	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1184 1185

	iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
1186 1187 1188 1189
}

static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
{
1190 1191 1192
	struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
	struct iser_device  *device = cq_desc->device;
	int cq_index = cq_desc->cq_index;
1193

1194
	tasklet_schedule(&device->cq_tasklet[cq_index]);
1195
}
1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220

u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
			     enum iser_data_dir cmd_dir, sector_t *sector)
{
	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
	struct fast_reg_descriptor *desc = reg->mem_h;
	unsigned long sector_size = iser_task->sc->device->sector_size;
	struct ib_mr_status mr_status;
	int ret;

	if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) {
		desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
		ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
					 IB_MR_CHECK_SIG_STATUS, &mr_status);
		if (ret) {
			pr_err("ib_check_mr_status failed, ret %d\n", ret);
			goto err;
		}

		if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
			sector_t sector_off = mr_status.sig_err.sig_err_offset;

			do_div(sector_off, sector_size + 8);
			*sector = scsi_get_lba(iser_task->sc) + sector_off;

1221
			pr_err("PI error found type %d at sector %llx "
1222
			       "expected %x vs actual %x\n",
1223 1224
			       mr_status.sig_err.err_type,
			       (unsigned long long)*sector,
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243
			       mr_status.sig_err.expected,
			       mr_status.sig_err.actual);

			switch (mr_status.sig_err.err_type) {
			case IB_SIG_BAD_GUARD:
				return 0x1;
			case IB_SIG_BAD_REFTAG:
				return 0x3;
			case IB_SIG_BAD_APPTAG:
				return 0x2;
			}
		}
	}

	return 0;
err:
	/* Not alot we can do here, return ambiguous guard error */
	return 0x1;
}