iser_initiator.c 15.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/kfifo.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>

#include "iscsi_iser.h"

/* Register user buffer memory and initialize passive rdma
 *  dto descriptor. Total data size is stored in
44
 *  iser_task->data[ISER_DIR_IN].data_len
45
 */
46
static int iser_prepare_read_cmd(struct iscsi_task *task,
47 48 49
				 unsigned int edtl)

{
50
	struct iscsi_iser_task *iser_task = task->dd_data;
51 52
	struct iser_regd_buf *regd_buf;
	int err;
53 54
	struct iser_hdr *hdr = &iser_task->desc.iser_header;
	struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
55

56
	err = iser_dma_map_task_data(iser_task,
57 58 59 60 61 62
				     buf_in,
				     ISER_DIR_IN,
				     DMA_FROM_DEVICE);
	if (err)
		return err;

63
	if (edtl > iser_task->data[ISER_DIR_IN].data_len) {
64 65
		iser_err("Total data length: %ld, less than EDTL: "
			 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
66 67
			 iser_task->data[ISER_DIR_IN].data_len, edtl,
			 task->itt, iser_task->iser_conn);
68 69 70
		return -EINVAL;
	}

71
	err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN);
72 73 74 75
	if (err) {
		iser_err("Failed to set up Data-IN RDMA\n");
		return err;
	}
76
	regd_buf = &iser_task->rdma_regd[ISER_DIR_IN];
77 78 79 80 81 82

	hdr->flags    |= ISER_RSV;
	hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey);
	hdr->read_va   = cpu_to_be64(regd_buf->reg.va);

	iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
83
		 task->itt, regd_buf->reg.rkey,
84 85 86 87 88 89 90
		 (unsigned long long)regd_buf->reg.va);

	return 0;
}

/* Register user buffer memory and initialize passive rdma
 *  dto descriptor. Total data size is stored in
91
 *  task->data[ISER_DIR_OUT].data_len
92 93
 */
static int
94
iser_prepare_write_cmd(struct iscsi_task *task,
95 96 97 98
		       unsigned int imm_sz,
		       unsigned int unsol_sz,
		       unsigned int edtl)
{
99
	struct iscsi_iser_task *iser_task = task->dd_data;
100 101
	struct iser_regd_buf *regd_buf;
	int err;
102 103
	struct iser_hdr *hdr = &iser_task->desc.iser_header;
	struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
104
	struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
105

106
	err = iser_dma_map_task_data(iser_task,
107 108 109 110 111 112
				     buf_out,
				     ISER_DIR_OUT,
				     DMA_TO_DEVICE);
	if (err)
		return err;

113
	if (edtl > iser_task->data[ISER_DIR_OUT].data_len) {
114 115
		iser_err("Total data length: %ld, less than EDTL: %d, "
			 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
116 117
			 iser_task->data[ISER_DIR_OUT].data_len,
			 edtl, task->itt, task->conn);
118 119 120
		return -EINVAL;
	}

121
	err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT);
122 123 124 125 126
	if (err != 0) {
		iser_err("Failed to register write cmd RDMA mem\n");
		return err;
	}

127
	regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
128 129 130 131 132 133 134 135

	if (unsol_sz < edtl) {
		hdr->flags     |= ISER_WSV;
		hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
		hdr->write_va   = cpu_to_be64(regd_buf->reg.va + unsol_sz);

		iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
			 "VA:%#llX + unsol:%d\n",
136
			 task->itt, regd_buf->reg.rkey,
137 138 139 140 141
			 (unsigned long long)regd_buf->reg.va, unsol_sz);
	}

	if (imm_sz > 0) {
		iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
142
			 task->itt, imm_sz);
143 144 145 146
		tx_dsg->addr   = regd_buf->reg.va;
		tx_dsg->length = imm_sz;
		tx_dsg->lkey   = regd_buf->reg.lkey;
		iser_task->desc.num_sge = 2;
147 148 149 150 151 152
	}

	return 0;
}

/* creates a new tx descriptor and adds header regd buffer */
153 154
static void iser_create_send_desc(struct iser_conn	*ib_conn,
				  struct iser_tx_desc	*tx_desc)
155
{
156
	struct iser_device *device = ib_conn->device;
157

158 159
	ib_dma_sync_single_for_cpu(device->ib_device,
		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
160 161 162 163

	memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
	tx_desc->iser_header.flags = ISER_VER;

164 165 166 167 168 169
	tx_desc->num_sge = 1;

	if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
		tx_desc->tx_sg[0].lkey = device->mr->lkey;
		iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc);
	}
170 171
}

172

173
static int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
{
	int i, j;
	u64 dma_addr;
	struct iser_rx_desc *rx_desc;
	struct ib_sge       *rx_sg;
	struct iser_device  *device = ib_conn->device;

	ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
				sizeof(struct iser_rx_desc), GFP_KERNEL);
	if (!ib_conn->rx_descs)
		goto rx_desc_alloc_fail;

	rx_desc = ib_conn->rx_descs;

	for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)  {
		dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
					ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(device->ib_device, dma_addr))
			goto rx_desc_dma_map_failed;

		rx_desc->dma_addr = dma_addr;

		rx_sg = &rx_desc->rx_sg;
		rx_sg->addr   = rx_desc->dma_addr;
		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
		rx_sg->lkey   = device->mr->lkey;
	}

	ib_conn->rx_desc_head = 0;
	return 0;

rx_desc_dma_map_failed:
	rx_desc = ib_conn->rx_descs;
	for (j = 0; j < i; j++, rx_desc++)
		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
	kfree(ib_conn->rx_descs);
	ib_conn->rx_descs = NULL;
rx_desc_alloc_fail:
	iser_err("failed allocating rx descriptors / data buffers\n");
	return -ENOMEM;
}

void iser_free_rx_descriptors(struct iser_conn *ib_conn)
{
	int i;
	struct iser_rx_desc *rx_desc;
	struct iser_device *device = ib_conn->device;

	if (ib_conn->login_buf) {
		ib_dma_unmap_single(device->ib_device, ib_conn->login_dma,
			ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
		kfree(ib_conn->login_buf);
	}

	if (!ib_conn->rx_descs)
		return;

	rx_desc = ib_conn->rx_descs;
	for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
	kfree(ib_conn->rx_descs);
}

239 240 241 242 243 244 245
/**
 *  iser_conn_set_full_featured_mode - (iSER API)
 */
int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;

246
	iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
247 248 249

	/* Check that there is no posted recv or send buffers left - */
	/* they must be consumed during the login phase */
250
	BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0);
251 252
	BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);

253 254 255
	if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
		return -ENOMEM;

256
	/* Initial post receive buffers */
257 258 259
	if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
		return -ENOMEM;

260 261 262 263 264 265
	return 0;
}

/**
 * iser_send_command - send command PDU
 */
266
int iser_send_command(struct iscsi_conn *conn,
267
		      struct iscsi_task *task)
268 269
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
270
	struct iscsi_iser_task *iser_task = task->dd_data;
271
	unsigned long edtl;
272
	int err;
273
	struct iser_data_buf *data_buf;
274
	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
275
	struct scsi_cmnd *sc  =  task->sc;
276
	struct iser_tx_desc *tx_desc = &iser_task->desc;
277 278 279 280

	edtl = ntohl(hdr->data_length);

	/* build the tx desc regd header and add it to the tx desc dto */
281 282
	tx_desc->type = ISCSI_TX_SCSI_COMMAND;
	iser_create_send_desc(iser_conn->ib_conn, tx_desc);
283 284

	if (hdr->flags & ISCSI_FLAG_CMD_READ)
285
		data_buf = &iser_task->data[ISER_DIR_IN];
286
	else
287
		data_buf = &iser_task->data[ISER_DIR_OUT];
288

289 290 291
	if (scsi_sg_count(sc)) { /* using a scatter list */
		data_buf->buf  = scsi_sglist(sc);
		data_buf->size = scsi_sg_count(sc);
292 293
	}

294
	data_buf->data_len = scsi_bufflen(sc);
295 296

	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
297
		err = iser_prepare_read_cmd(task, edtl);
298 299 300 301
		if (err)
			goto send_command_error;
	}
	if (hdr->flags & ISCSI_FLAG_CMD_WRITE) {
302 303 304
		err = iser_prepare_write_cmd(task,
					     task->imm_count,
				             task->imm_count +
305
					     task->unsol_r2t.data_length,
306 307 308 309 310
					     edtl);
		if (err)
			goto send_command_error;
	}

311
	iser_task->status = ISER_TASK_STATUS_STARTED;
312

313
	err = iser_post_send(iser_conn->ib_conn, tx_desc);
314 315 316 317
	if (!err)
		return 0;

send_command_error:
318
	iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
319 320 321 322 323 324
	return err;
}

/**
 * iser_send_data_out - send data out PDU
 */
325
int iser_send_data_out(struct iscsi_conn *conn,
326
		       struct iscsi_task *task,
327 328 329
		       struct iscsi_data *hdr)
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
330
	struct iscsi_iser_task *iser_task = task->dd_data;
331 332
	struct iser_tx_desc *tx_desc = NULL;
	struct iser_regd_buf *regd_buf;
333 334
	unsigned long buf_offset;
	unsigned long data_seg_len;
335
	uint32_t itt;
336
	int err = 0;
337 338
	struct ib_sge *tx_dsg;

339
	itt = (__force uint32_t)hdr->itt;
340 341 342 343 344 345
	data_seg_len = ntoh24(hdr->dlength);
	buf_offset   = ntohl(hdr->offset);

	iser_dbg("%s itt %d dseg_len %d offset %d\n",
		 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);

O
Or Gerlitz 已提交
346
	tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
347 348 349 350 351 352
	if (tx_desc == NULL) {
		iser_err("Failed to alloc desc for post dataout\n");
		return -ENOMEM;
	}

	tx_desc->type = ISCSI_TX_DATAOUT;
353
	tx_desc->iser_header.flags = ISER_VER;
354 355
	memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));

356 357
	/* build the tx desc */
	iser_initialize_task_headers(task, tx_desc);
358

359 360 361 362 363 364
	regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
	tx_dsg = &tx_desc->tx_sg[1];
	tx_dsg->addr    = regd_buf->reg.va + buf_offset;
	tx_dsg->length  = data_seg_len;
	tx_dsg->lkey    = regd_buf->reg.lkey;
	tx_desc->num_sge = 2;
365

366
	if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
367 368 369
		iser_err("Offset:%ld & DSL:%ld in Data-Out "
			 "inconsistent with total len:%ld, itt:%d\n",
			 buf_offset, data_seg_len,
370
			 iser_task->data[ISER_DIR_OUT].data_len, itt);
371 372 373 374 375 376 377
		err = -EINVAL;
		goto send_data_out_error;
	}
	iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
		 itt, buf_offset, data_seg_len);


378
	err = iser_post_send(iser_conn->ib_conn, tx_desc);
379 380 381 382 383 384 385 386 387 388
	if (!err)
		return 0;

send_data_out_error:
	kmem_cache_free(ig.desc_cache, tx_desc);
	iser_err("conn %p failed err %d\n",conn, err);
	return err;
}

int iser_send_control(struct iscsi_conn *conn,
389
		      struct iscsi_task *task)
390 391
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
392
	struct iscsi_iser_task *iser_task = task->dd_data;
393
	struct iser_tx_desc *mdesc = &iser_task->desc;
394
	unsigned long data_seg_len;
395
	int err = 0;
396 397 398 399
	struct iser_device *device;

	/* build the tx desc regd header and add it to the tx desc dto */
	mdesc->type = ISCSI_TX_CONTROL;
400
	iser_create_send_desc(iser_conn->ib_conn, mdesc);
401 402 403

	device = iser_conn->ib_conn->device;

404
	data_seg_len = ntoh24(task->hdr->dlength);
405 406

	if (data_seg_len > 0) {
407 408 409 410 411 412 413 414
		struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
		if (task != conn->login_task) {
			iser_err("data present on non login task!!!\n");
			goto send_control_error;
		}
		memcpy(iser_conn->ib_conn->login_buf, task->data,
							task->data_count);
		tx_dsg->addr    = iser_conn->ib_conn->login_dma;
O
Or Gerlitz 已提交
415
		tx_dsg->length  = task->data_count;
416 417
		tx_dsg->lkey    = device->mr->lkey;
		mdesc->num_sge = 2;
418 419
	}

420 421 422 423
	if (task == conn->login_task) {
		err = iser_post_recvl(iser_conn->ib_conn);
		if (err)
			goto send_control_error;
424 425
	}

426
	err = iser_post_send(iser_conn->ib_conn, mdesc);
427 428 429 430 431 432 433 434 435 436 437
	if (!err)
		return 0;

send_control_error:
	iser_err("conn %p failed err %d\n",conn, err);
	return err;
}

/**
 * iser_rcv_dto_completion - recv DTO completion
 */
438 439 440
void iser_rcv_completion(struct iser_rx_desc *rx_desc,
			 unsigned long rx_xfer_len,
			 struct iser_conn *ib_conn)
441
{
442
	struct iscsi_iser_conn *conn = ib_conn->iser_conn;
443
	struct iscsi_hdr *hdr;
444 445 446 447 448 449 450 451 452 453 454
	u64 rx_dma;
	int rx_buflen, outstanding, count, err;

	/* differentiate between login to all other PDUs */
	if ((char *)rx_desc == ib_conn->login_buf) {
		rx_dma = ib_conn->login_dma;
		rx_buflen = ISER_RX_LOGIN_SIZE;
	} else {
		rx_dma = rx_desc->dma_addr;
		rx_buflen = ISER_RX_PAYLOAD_SIZE;
	}
455

456 457
	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
			rx_buflen, DMA_FROM_DEVICE);
458

459
	hdr = &rx_desc->iscsi_header;
460

461 462
	iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
			hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
463

464 465
	iscsi_iser_recv(conn->iscsi_conn, hdr,
		rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
466

467 468
	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
			rx_buflen, DMA_FROM_DEVICE);
469 470 471 472 473

	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
	 * task eliminates the need to worry on tasks which are completed in   *
	 * parallel to the execution of iser_conn_term. So the code that waits *
	 * for the posted rx bufs refcount to become zero handles everything   */
474
	conn->ib_conn->post_recv_buf_count--;
475 476 477 478

	if (rx_dma == ib_conn->login_dma)
		return;

479
	outstanding = ib_conn->post_recv_buf_count;
480 481 482 483 484 485 486
	if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
		count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
						ISER_MIN_POSTED_RX);
		err = iser_post_recvm(ib_conn, count);
		if (err)
			iser_err("posting %d rx bufs err %d\n", count, err);
	}
487 488
}

489 490
void iser_snd_completion(struct iser_tx_desc *tx_desc,
			struct iser_conn *ib_conn)
491
{
492
	struct iscsi_task *task;
493
	struct iser_device *device = ib_conn->device;
494

495 496 497
	if (tx_desc->type == ISCSI_TX_DATAOUT) {
		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
					ISER_HEADERS_LEN, DMA_TO_DEVICE);
498
		kmem_cache_free(ig.desc_cache, tx_desc);
499
	}
500

501
	atomic_dec(&ib_conn->post_send_buf_count);
502 503 504

	if (tx_desc->type == ISCSI_TX_CONTROL) {
		/* this arithmetic is legal by libiscsi dd_data allocation */
505 506 507 508
		task = (void *) ((long)(void *)tx_desc -
				  sizeof(struct iscsi_task));
		if (task->hdr->itt == RESERVED_ITT)
			iscsi_put_task(task);
509 510 511
	}
}

512
void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
513 514

{
515
	iser_task->status = ISER_TASK_STATUS_INIT;
516

517 518
	iser_task->dir[ISER_DIR_IN] = 0;
	iser_task->dir[ISER_DIR_OUT] = 0;
519

520 521
	iser_task->data[ISER_DIR_IN].data_len  = 0;
	iser_task->data[ISER_DIR_OUT].data_len = 0;
522

523
	memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
524
	       sizeof(struct iser_regd_buf));
525
	memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
526 527 528
	       sizeof(struct iser_regd_buf));
}

529
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
530
{
531
	int is_rdma_aligned = 1;
532
	struct iser_regd_buf *regd;
533 534 535 536

	/* if we were reading, copy back to unaligned sglist,
	 * anyway dma_unmap and free the copy
	 */
537
	if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) {
538
		is_rdma_aligned = 0;
539
		iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN);
540
	}
541
	if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
542
		is_rdma_aligned = 0;
543
		iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
544
	}
545

546 547
	if (iser_task->dir[ISER_DIR_IN]) {
		regd = &iser_task->rdma_regd[ISER_DIR_IN];
548 549
		if (regd->reg.is_fmr)
			iser_unreg_mem(&regd->reg);
550 551
	}

552 553
	if (iser_task->dir[ISER_DIR_OUT]) {
		regd = &iser_task->rdma_regd[ISER_DIR_OUT];
554 555
		if (regd->reg.is_fmr)
			iser_unreg_mem(&regd->reg);
556 557
	}

558 559
       /* if the data was unaligned, it was already unmapped and then copied */
       if (is_rdma_aligned)
560
		iser_dma_unmap_task_data(iser_task);
561
}