iser_initiator.c 15.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
/*
 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/kfifo.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>

#include "iscsi_iser.h"

/* Register user buffer memory and initialize passive rdma
 *  dto descriptor. Total data size is stored in
44
 *  iser_task->data[ISER_DIR_IN].data_len
45
 */
46
static int iser_prepare_read_cmd(struct iscsi_task *task,
47 48 49
				 unsigned int edtl)

{
50
	struct iscsi_iser_task *iser_task = task->dd_data;
51 52
	struct iser_regd_buf *regd_buf;
	int err;
53 54
	struct iser_hdr *hdr = &iser_task->desc.iser_header;
	struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
55

56
	err = iser_dma_map_task_data(iser_task,
57 58 59 60 61 62
				     buf_in,
				     ISER_DIR_IN,
				     DMA_FROM_DEVICE);
	if (err)
		return err;

63
	if (edtl > iser_task->data[ISER_DIR_IN].data_len) {
64 65
		iser_err("Total data length: %ld, less than EDTL: "
			 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
66 67
			 iser_task->data[ISER_DIR_IN].data_len, edtl,
			 task->itt, iser_task->iser_conn);
68 69 70
		return -EINVAL;
	}

71
	err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN);
72 73 74 75
	if (err) {
		iser_err("Failed to set up Data-IN RDMA\n");
		return err;
	}
76
	regd_buf = &iser_task->rdma_regd[ISER_DIR_IN];
77 78 79 80 81 82

	hdr->flags    |= ISER_RSV;
	hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey);
	hdr->read_va   = cpu_to_be64(regd_buf->reg.va);

	iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
83
		 task->itt, regd_buf->reg.rkey,
84 85 86 87 88 89 90
		 (unsigned long long)regd_buf->reg.va);

	return 0;
}

/* Register user buffer memory and initialize passive rdma
 *  dto descriptor. Total data size is stored in
91
 *  task->data[ISER_DIR_OUT].data_len
92 93
 */
static int
94
iser_prepare_write_cmd(struct iscsi_task *task,
95 96 97 98
		       unsigned int imm_sz,
		       unsigned int unsol_sz,
		       unsigned int edtl)
{
99
	struct iscsi_iser_task *iser_task = task->dd_data;
100 101
	struct iser_regd_buf *regd_buf;
	int err;
102 103
	struct iser_hdr *hdr = &iser_task->desc.iser_header;
	struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
104
	struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
105

106
	err = iser_dma_map_task_data(iser_task,
107 108 109 110 111 112
				     buf_out,
				     ISER_DIR_OUT,
				     DMA_TO_DEVICE);
	if (err)
		return err;

113
	if (edtl > iser_task->data[ISER_DIR_OUT].data_len) {
114 115
		iser_err("Total data length: %ld, less than EDTL: %d, "
			 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
116 117
			 iser_task->data[ISER_DIR_OUT].data_len,
			 edtl, task->itt, task->conn);
118 119 120
		return -EINVAL;
	}

121
	err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT);
122 123 124 125 126
	if (err != 0) {
		iser_err("Failed to register write cmd RDMA mem\n");
		return err;
	}

127
	regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
128 129 130 131 132 133 134 135

	if (unsol_sz < edtl) {
		hdr->flags     |= ISER_WSV;
		hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
		hdr->write_va   = cpu_to_be64(regd_buf->reg.va + unsol_sz);

		iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
			 "VA:%#llX + unsol:%d\n",
136
			 task->itt, regd_buf->reg.rkey,
137 138 139 140 141
			 (unsigned long long)regd_buf->reg.va, unsol_sz);
	}

	if (imm_sz > 0) {
		iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
142
			 task->itt, imm_sz);
143 144 145 146
		tx_dsg->addr   = regd_buf->reg.va;
		tx_dsg->length = imm_sz;
		tx_dsg->lkey   = regd_buf->reg.lkey;
		iser_task->desc.num_sge = 2;
147 148 149 150 151 152
	}

	return 0;
}

/* creates a new tx descriptor and adds header regd buffer */
153 154
static void iser_create_send_desc(struct iser_conn	*ib_conn,
				  struct iser_tx_desc	*tx_desc)
155
{
156
	struct iser_device *device = ib_conn->device;
157

158 159
	ib_dma_sync_single_for_cpu(device->ib_device,
		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
160 161 162 163

	memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
	tx_desc->iser_header.flags = ISER_VER;

164 165 166 167 168 169
	tx_desc->num_sge = 1;

	if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
		tx_desc->tx_sg[0].lkey = device->mr->lkey;
		iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc);
	}
170 171
}

172

173
static int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
{
	int i, j;
	u64 dma_addr;
	struct iser_rx_desc *rx_desc;
	struct ib_sge       *rx_sg;
	struct iser_device  *device = ib_conn->device;

	ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
				sizeof(struct iser_rx_desc), GFP_KERNEL);
	if (!ib_conn->rx_descs)
		goto rx_desc_alloc_fail;

	rx_desc = ib_conn->rx_descs;

	for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)  {
		dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
					ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
		if (ib_dma_mapping_error(device->ib_device, dma_addr))
			goto rx_desc_dma_map_failed;

		rx_desc->dma_addr = dma_addr;

		rx_sg = &rx_desc->rx_sg;
		rx_sg->addr   = rx_desc->dma_addr;
		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
		rx_sg->lkey   = device->mr->lkey;
	}

	ib_conn->rx_desc_head = 0;
	return 0;

rx_desc_dma_map_failed:
	rx_desc = ib_conn->rx_descs;
	for (j = 0; j < i; j++, rx_desc++)
		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
	kfree(ib_conn->rx_descs);
	ib_conn->rx_descs = NULL;
rx_desc_alloc_fail:
	iser_err("failed allocating rx descriptors / data buffers\n");
	return -ENOMEM;
}

void iser_free_rx_descriptors(struct iser_conn *ib_conn)
{
	int i;
	struct iser_rx_desc *rx_desc;
	struct iser_device *device = ib_conn->device;

	if (!ib_conn->rx_descs)
		return;

	rx_desc = ib_conn->rx_descs;
	for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
	kfree(ib_conn->rx_descs);
}

233 234 235 236 237 238 239
/**
 *  iser_conn_set_full_featured_mode - (iSER API)
 */
int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;

240
	iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
241 242 243

	/* Check that there is no posted recv or send buffers left - */
	/* they must be consumed during the login phase */
244
	BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0);
245 246
	BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);

247 248 249
	if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
		return -ENOMEM;

250
	/* Initial post receive buffers */
251 252 253
	if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
		return -ENOMEM;

254 255 256 257 258 259
	return 0;
}

/**
 * iser_send_command - send command PDU
 */
260
int iser_send_command(struct iscsi_conn *conn,
261
		      struct iscsi_task *task)
262 263
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
264
	struct iscsi_iser_task *iser_task = task->dd_data;
265
	unsigned long edtl;
266
	int err;
267
	struct iser_data_buf *data_buf;
268
	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
269
	struct scsi_cmnd *sc  =  task->sc;
270
	struct iser_tx_desc *tx_desc = &iser_task->desc;
271 272 273 274

	edtl = ntohl(hdr->data_length);

	/* build the tx desc regd header and add it to the tx desc dto */
275 276
	tx_desc->type = ISCSI_TX_SCSI_COMMAND;
	iser_create_send_desc(iser_conn->ib_conn, tx_desc);
277 278

	if (hdr->flags & ISCSI_FLAG_CMD_READ)
279
		data_buf = &iser_task->data[ISER_DIR_IN];
280
	else
281
		data_buf = &iser_task->data[ISER_DIR_OUT];
282

283 284 285
	if (scsi_sg_count(sc)) { /* using a scatter list */
		data_buf->buf  = scsi_sglist(sc);
		data_buf->size = scsi_sg_count(sc);
286 287
	}

288
	data_buf->data_len = scsi_bufflen(sc);
289 290

	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
291
		err = iser_prepare_read_cmd(task, edtl);
292 293 294 295
		if (err)
			goto send_command_error;
	}
	if (hdr->flags & ISCSI_FLAG_CMD_WRITE) {
296 297 298
		err = iser_prepare_write_cmd(task,
					     task->imm_count,
				             task->imm_count +
299
					     task->unsol_r2t.data_length,
300 301 302 303 304
					     edtl);
		if (err)
			goto send_command_error;
	}

305
	iser_task->status = ISER_TASK_STATUS_STARTED;
306

307
	err = iser_post_send(iser_conn->ib_conn, tx_desc);
308 309 310 311
	if (!err)
		return 0;

send_command_error:
312
	iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
313 314 315 316 317 318
	return err;
}

/**
 * iser_send_data_out - send data out PDU
 */
319
int iser_send_data_out(struct iscsi_conn *conn,
320
		       struct iscsi_task *task,
321 322 323
		       struct iscsi_data *hdr)
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
324
	struct iscsi_iser_task *iser_task = task->dd_data;
325 326
	struct iser_tx_desc *tx_desc = NULL;
	struct iser_regd_buf *regd_buf;
327 328
	unsigned long buf_offset;
	unsigned long data_seg_len;
329
	uint32_t itt;
330
	int err = 0;
331 332
	struct ib_sge *tx_dsg;

333
	itt = (__force uint32_t)hdr->itt;
334 335 336 337 338 339
	data_seg_len = ntoh24(hdr->dlength);
	buf_offset   = ntohl(hdr->offset);

	iser_dbg("%s itt %d dseg_len %d offset %d\n",
		 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);

O
Or Gerlitz 已提交
340
	tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
341 342 343 344 345 346
	if (tx_desc == NULL) {
		iser_err("Failed to alloc desc for post dataout\n");
		return -ENOMEM;
	}

	tx_desc->type = ISCSI_TX_DATAOUT;
347
	tx_desc->iser_header.flags = ISER_VER;
348 349
	memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));

350 351
	/* build the tx desc */
	iser_initialize_task_headers(task, tx_desc);
352

353 354 355 356 357 358
	regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
	tx_dsg = &tx_desc->tx_sg[1];
	tx_dsg->addr    = regd_buf->reg.va + buf_offset;
	tx_dsg->length  = data_seg_len;
	tx_dsg->lkey    = regd_buf->reg.lkey;
	tx_desc->num_sge = 2;
359

360
	if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
361 362 363
		iser_err("Offset:%ld & DSL:%ld in Data-Out "
			 "inconsistent with total len:%ld, itt:%d\n",
			 buf_offset, data_seg_len,
364
			 iser_task->data[ISER_DIR_OUT].data_len, itt);
365 366 367 368 369 370 371
		err = -EINVAL;
		goto send_data_out_error;
	}
	iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
		 itt, buf_offset, data_seg_len);


372
	err = iser_post_send(iser_conn->ib_conn, tx_desc);
373 374 375 376 377 378 379 380 381 382
	if (!err)
		return 0;

send_data_out_error:
	kmem_cache_free(ig.desc_cache, tx_desc);
	iser_err("conn %p failed err %d\n",conn, err);
	return err;
}

int iser_send_control(struct iscsi_conn *conn,
383
		      struct iscsi_task *task)
384 385
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
386
	struct iscsi_iser_task *iser_task = task->dd_data;
387
	struct iser_tx_desc *mdesc = &iser_task->desc;
388
	unsigned long data_seg_len;
389
	int err = 0;
390
	struct iser_device *device;
391
	struct iser_conn *ib_conn = iser_conn->ib_conn;
392 393 394

	/* build the tx desc regd header and add it to the tx desc dto */
	mdesc->type = ISCSI_TX_CONTROL;
395
	iser_create_send_desc(iser_conn->ib_conn, mdesc);
396 397 398

	device = iser_conn->ib_conn->device;

399
	data_seg_len = ntoh24(task->hdr->dlength);
400 401

	if (data_seg_len > 0) {
402 403 404 405 406
		struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
		if (task != conn->login_task) {
			iser_err("data present on non login task!!!\n");
			goto send_control_error;
		}
407 408 409 410 411 412

		ib_dma_sync_single_for_cpu(device->ib_device,
			ib_conn->login_req_dma, task->data_count,
			DMA_TO_DEVICE);

		memcpy(iser_conn->ib_conn->login_req_buf, task->data,
413
							task->data_count);
414 415 416 417 418 419

		ib_dma_sync_single_for_device(device->ib_device,
			ib_conn->login_req_dma, task->data_count,
			DMA_TO_DEVICE);

		tx_dsg->addr    = iser_conn->ib_conn->login_req_dma;
O
Or Gerlitz 已提交
420
		tx_dsg->length  = task->data_count;
421 422
		tx_dsg->lkey    = device->mr->lkey;
		mdesc->num_sge = 2;
423 424
	}

425 426 427 428
	if (task == conn->login_task) {
		err = iser_post_recvl(iser_conn->ib_conn);
		if (err)
			goto send_control_error;
429 430
	}

431
	err = iser_post_send(iser_conn->ib_conn, mdesc);
432 433 434 435 436 437 438 439 440 441 442
	if (!err)
		return 0;

send_control_error:
	iser_err("conn %p failed err %d\n",conn, err);
	return err;
}

/**
 * iser_rcv_dto_completion - recv DTO completion
 */
443 444 445
void iser_rcv_completion(struct iser_rx_desc *rx_desc,
			 unsigned long rx_xfer_len,
			 struct iser_conn *ib_conn)
446
{
447
	struct iscsi_iser_conn *conn = ib_conn->iser_conn;
448
	struct iscsi_hdr *hdr;
449 450 451 452
	u64 rx_dma;
	int rx_buflen, outstanding, count, err;

	/* differentiate between login to all other PDUs */
453 454
	if ((char *)rx_desc == ib_conn->login_resp_buf) {
		rx_dma = ib_conn->login_resp_dma;
455 456 457 458 459
		rx_buflen = ISER_RX_LOGIN_SIZE;
	} else {
		rx_dma = rx_desc->dma_addr;
		rx_buflen = ISER_RX_PAYLOAD_SIZE;
	}
460

461 462
	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
			rx_buflen, DMA_FROM_DEVICE);
463

464
	hdr = &rx_desc->iscsi_header;
465

466 467
	iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
			hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
468

469 470
	iscsi_iser_recv(conn->iscsi_conn, hdr,
		rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
471

472 473
	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
			rx_buflen, DMA_FROM_DEVICE);
474 475 476 477 478

	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
	 * task eliminates the need to worry on tasks which are completed in   *
	 * parallel to the execution of iser_conn_term. So the code that waits *
	 * for the posted rx bufs refcount to become zero handles everything   */
479
	conn->ib_conn->post_recv_buf_count--;
480

481
	if (rx_dma == ib_conn->login_resp_dma)
482 483
		return;

484
	outstanding = ib_conn->post_recv_buf_count;
485 486 487 488 489 490 491
	if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
		count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
						ISER_MIN_POSTED_RX);
		err = iser_post_recvm(ib_conn, count);
		if (err)
			iser_err("posting %d rx bufs err %d\n", count, err);
	}
492 493
}

494 495
void iser_snd_completion(struct iser_tx_desc *tx_desc,
			struct iser_conn *ib_conn)
496
{
497
	struct iscsi_task *task;
498
	struct iser_device *device = ib_conn->device;
499

500 501 502
	if (tx_desc->type == ISCSI_TX_DATAOUT) {
		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
					ISER_HEADERS_LEN, DMA_TO_DEVICE);
503
		kmem_cache_free(ig.desc_cache, tx_desc);
504
	}
505

506
	atomic_dec(&ib_conn->post_send_buf_count);
507 508 509

	if (tx_desc->type == ISCSI_TX_CONTROL) {
		/* this arithmetic is legal by libiscsi dd_data allocation */
510 511 512 513
		task = (void *) ((long)(void *)tx_desc -
				  sizeof(struct iscsi_task));
		if (task->hdr->itt == RESERVED_ITT)
			iscsi_put_task(task);
514 515 516
	}
}

517
void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
518 519

{
520
	iser_task->status = ISER_TASK_STATUS_INIT;
521

522 523
	iser_task->dir[ISER_DIR_IN] = 0;
	iser_task->dir[ISER_DIR_OUT] = 0;
524

525 526
	iser_task->data[ISER_DIR_IN].data_len  = 0;
	iser_task->data[ISER_DIR_OUT].data_len = 0;
527

528
	memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
529
	       sizeof(struct iser_regd_buf));
530
	memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
531 532 533
	       sizeof(struct iser_regd_buf));
}

534
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
535
{
536
	int is_rdma_aligned = 1;
537
	struct iser_regd_buf *regd;
538 539 540 541

	/* if we were reading, copy back to unaligned sglist,
	 * anyway dma_unmap and free the copy
	 */
542
	if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) {
543
		is_rdma_aligned = 0;
544
		iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN);
545
	}
546
	if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
547
		is_rdma_aligned = 0;
548
		iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
549
	}
550

551 552
	if (iser_task->dir[ISER_DIR_IN]) {
		regd = &iser_task->rdma_regd[ISER_DIR_IN];
553 554
		if (regd->reg.is_fmr)
			iser_unreg_mem(&regd->reg);
555 556
	}

557 558
	if (iser_task->dir[ISER_DIR_OUT]) {
		regd = &iser_task->rdma_regd[ISER_DIR_OUT];
559 560
		if (regd->reg.is_fmr)
			iser_unreg_mem(&regd->reg);
561 562
	}

563 564
       /* if the data was unaligned, it was already unmapped and then copied */
       if (is_rdma_aligned)
565
		iser_dma_unmap_task_data(iser_task);
566
}