iscsi_tcp.c 52.4 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/list.h>
#include <linux/inet.h>
M
Mike Christie 已提交
32
#include <linux/file.h>
33 34 35 36 37 38 39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41 42 43 44 45 46 47 48 49 50
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");
O
Olaf Kirch 已提交
51
#undef DEBUG_TCP
52 53 54
#define DEBUG_ASSERT

#ifdef DEBUG_TCP
55
#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
56 57 58 59 60 61 62 63 64 65 66
#else
#define debug_tcp(fmt...)
#endif

#ifndef DEBUG_ASSERT
#ifdef BUG_ON
#undef BUG_ON
#endif
#define BUG_ON(expr)
#endif

67 68 69 70
static struct scsi_transport_template *iscsi_tcp_scsi_transport;
static struct scsi_host_template iscsi_sht;
static struct iscsi_transport iscsi_tcp_transport;

71 72 73
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

O
Olaf Kirch 已提交
74
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
75
				   struct iscsi_segment *segment);
76

O
Olaf Kirch 已提交
77
/*
78
 * Scatterlist handling: inside the iscsi_segment, we
O
Olaf Kirch 已提交
79 80 81 82 83 84 85 86 87 88 89 90
 * remember an index into the scatterlist, and set data/size
 * to the current scatterlist entry. For highmem pages, we
 * kmap as needed.
 *
 * Note that the page is unmapped when we return from
 * TCP's data_ready handler, so we may end up mapping and
 * unmapping the same page repeatedly. The whole reason
 * for this is that we shouldn't keep the page mapped
 * outside the softirq.
 */

/**
91 92 93
 * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
 * @segment: the buffer object
 * @sg: scatterlist
O
Olaf Kirch 已提交
94 95
 * @offset: byte offset into that sg entry
 *
96
 * This function sets up the segment so that subsequent
O
Olaf Kirch 已提交
97 98 99 100
 * data is copied to the indicated sg entry, at the given
 * offset.
 */
static inline void
101 102
iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
			  struct scatterlist *sg, unsigned int offset)
O
Olaf Kirch 已提交
103
{
104 105 106 107 108
	segment->sg = sg;
	segment->sg_offset = offset;
	segment->size = min(sg->length - offset,
			    segment->total_size - segment->total_copied);
	segment->data = NULL;
O
Olaf Kirch 已提交
109 110 111
}

/**
112 113 114
 * iscsi_tcp_segment_map - map the current S/G page
 * @segment: iscsi_segment
 * @recv: 1 if called from recv path
O
Olaf Kirch 已提交
115 116 117 118 119 120
 *
 * We only need to possibly kmap data if scatter lists are being used,
 * because the iscsi passthrough and internal IO paths will never use high
 * mem pages.
 */
static inline void
121
iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
O
Olaf Kirch 已提交
122 123 124
{
	struct scatterlist *sg;

125
	if (segment->data != NULL || !segment->sg)
O
Olaf Kirch 已提交
126 127
		return;

128 129
	sg = segment->sg;
	BUG_ON(segment->sg_mapped);
O
Olaf Kirch 已提交
130
	BUG_ON(sg->length == 0);
131 132 133 134 135 136 137 138 139 140 141 142 143 144

	/*
	 * If the page count is greater than one it is ok to send
	 * to the network layer's zero copy send path. If not we
	 * have to go the slow sendmsg path. We always map for the
	 * recv path.
	 */
	if (page_count(sg_page(sg)) >= 1 && !recv)
		return;

	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
		  segment);
	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
O
Olaf Kirch 已提交
145 146 147
}

static inline void
148
iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
O
Olaf Kirch 已提交
149
{
150 151 152 153 154 155 156
	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);

	if (segment->sg_mapped) {
		debug_tcp("iscsi_tcp_segment_unmap valid\n");
		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
		segment->sg_mapped = NULL;
		segment->data = NULL;
O
Olaf Kirch 已提交
157 158 159 160 161 162 163
	}
}

/*
 * Splice the digest buffer into the buffer
 */
static inline void
164
iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
O
Olaf Kirch 已提交
165
{
166 167 168 169 170 171 172
	segment->data = digest;
	segment->digest_len = ISCSI_DIGEST_SIZE;
	segment->total_size += ISCSI_DIGEST_SIZE;
	segment->size = ISCSI_DIGEST_SIZE;
	segment->copied = 0;
	segment->sg = NULL;
	segment->hash = NULL;
O
Olaf Kirch 已提交
173 174 175
}

/**
176 177 178 179
 * iscsi_tcp_segment_done - check whether the segment is complete
 * @segment: iscsi segment to check
 * @recv: set to one of this is called from the recv path
 * @copied: number of bytes copied
O
Olaf Kirch 已提交
180
 *
181
 * Check if we're done receiving this segment. If the receive
O
Olaf Kirch 已提交
182 183 184 185 186 187 188 189
 * buffer is full but we expect more data, move on to the
 * next entry in the scatterlist.
 *
 * If the amount of data we received isn't a multiple of 4,
 * we will transparently receive the pad bytes, too.
 *
 * This function must be re-entrant.
 */
190
static inline int
191
iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv, unsigned copied)
192
{
O
Olaf Kirch 已提交
193
	static unsigned char padbuf[ISCSI_PAD_LEN];
194
	struct scatterlist sg;
195
	unsigned int pad;
O
Olaf Kirch 已提交
196

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
		  segment->size, recv ? "recv" : "xmit");
	if (segment->hash && copied) {
		/*
		 * If a segment is kmapd we must unmap it before sending
		 * to the crypto layer since that will try to kmap it again.
		 */
		iscsi_tcp_segment_unmap(segment);

		if (!segment->data) {
			sg_init_table(&sg, 1);
			sg_set_page(&sg, sg_page(segment->sg), copied,
				    segment->copied + segment->sg_offset +
							segment->sg->offset);
		} else
			sg_init_one(&sg, segment->data + segment->copied,
				    copied);
		crypto_hash_update(segment->hash, &sg, copied);
	}

	segment->copied += copied;
	if (segment->copied < segment->size) {
		iscsi_tcp_segment_map(segment, recv);
O
Olaf Kirch 已提交
220 221
		return 0;
	}
222

223 224 225
	segment->total_copied += segment->copied;
	segment->copied = 0;
	segment->size = 0;
226

O
Olaf Kirch 已提交
227
	/* Unmap the current scatterlist page, if there is one. */
228
	iscsi_tcp_segment_unmap(segment);
O
Olaf Kirch 已提交
229 230

	/* Do we have more scatterlist entries? */
231 232 233
	debug_tcp("total copied %u total size %u\n", segment->total_copied,
		   segment->total_size);
	if (segment->total_copied < segment->total_size) {
O
Olaf Kirch 已提交
234
		/* Proceed to the next entry in the scatterlist. */
235 236 237 238
		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
					  0);
		iscsi_tcp_segment_map(segment, recv);
		BUG_ON(segment->size == 0);
O
Olaf Kirch 已提交
239 240 241 242
		return 0;
	}

	/* Do we need to handle padding? */
243
	pad = iscsi_padding(segment->total_copied);
244
	if (pad != 0) {
O
Olaf Kirch 已提交
245
		debug_tcp("consume %d pad bytes\n", pad);
246 247 248
		segment->total_size += pad;
		segment->size = pad;
		segment->data = padbuf;
O
Olaf Kirch 已提交
249 250 251 252
		return 0;
	}

	/*
253
	 * Set us up for transferring the data digest. hdr digest
O
Olaf Kirch 已提交
254 255
	 * is completely handled in hdr done function.
	 */
256 257 258 259 260
	if (segment->hash) {
		crypto_hash_final(segment->hash, segment->digest);
		iscsi_tcp_segment_splice_digest(segment,
				 recv ? segment->recv_digest : segment->digest);
		return 0;
O
Olaf Kirch 已提交
261
	}
262

O
Olaf Kirch 已提交
263 264
	return 1;
}
265

O
Olaf Kirch 已提交
266
/**
267
 * iscsi_tcp_xmit_segment - transmit segment
O
Olaf Kirch 已提交
268
 * @tcp_conn: the iSCSI TCP connection
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
static int
iscsi_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment)
{
	struct socket *sk = tcp_conn->sock;
	unsigned int copied = 0;
	int r = 0;

	while (!iscsi_tcp_segment_done(segment, 0, r)) {
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_conn->sendpage(sk, sg_page(sg), offset, copy,
					       flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			if (copied || r == -EAGAIN)
				break;
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_tcp_segment_recv - copy data to segment
 * @tcp_conn: the iSCSI TCP connection
 * @segment: the buffer to copy to
O
Olaf Kirch 已提交
330 331 332 333 334 335 336 337 338 339 340 341 342 343
 * @ptr: data pointer
 * @len: amount of data available
 *
 * This function copies up to @len bytes to the
 * given buffer, and returns the number of bytes
 * consumed, which can actually be less than @len.
 *
 * If hash digest is enabled, the function will update the
 * hash while copying.
 * Combining these two operations doesn't buy us a lot (yet),
 * but in the future we could implement combined copy+crc,
 * just way we do for network layer checksums.
 */
static int
344 345 346
iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment, const void *ptr,
		       unsigned int len)
O
Olaf Kirch 已提交
347
{
348
	unsigned int copy = 0, copied = 0;
349

350 351 352 353 354
	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
		if (copied == len) {
			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
				  len);
			break;
O
Olaf Kirch 已提交
355
		}
356 357 358 359

		copy = min(len - copied, segment->size - segment->copied);
		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
		memcpy(segment->data + segment->copied, ptr + copied, copy);
O
Olaf Kirch 已提交
360 361 362 363 364 365 366 367 368 369
		copied += copy;
	}
	return copied;
}

static inline void
iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
		      unsigned char digest[ISCSI_DIGEST_SIZE])
{
	struct scatterlist sg;
370

O
Olaf Kirch 已提交
371 372 373 374 375 376
	sg_init_one(&sg, hdr, hdrlen);
	crypto_hash_digest(hash, &sg, hdrlen, digest);
}

static inline int
iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
377
		      struct iscsi_segment *segment)
O
Olaf Kirch 已提交
378
{
379
	if (!segment->digest_len)
O
Olaf Kirch 已提交
380 381
		return 1;

382 383
	if (memcmp(segment->recv_digest, segment->digest,
		   segment->digest_len)) {
O
Olaf Kirch 已提交
384 385 386 387 388 389 390 391
		debug_scsi("digest mismatch\n");
		return 0;
	}

	return 1;
}

/*
392
 * Helper function to set up segment buffer
O
Olaf Kirch 已提交
393 394
 */
static inline void
395 396
__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
397
{
398 399 400
	memset(segment, 0, sizeof(*segment));
	segment->total_size = size;
	segment->done = done;
O
Olaf Kirch 已提交
401 402

	if (hash) {
403
		segment->hash = hash;
O
Olaf Kirch 已提交
404 405 406 407 408
		crypto_hash_init(hash);
	}
}

static inline void
409 410 411
iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
			  size_t size, iscsi_segment_done_fn_t *done,
			  struct hash_desc *hash)
O
Olaf Kirch 已提交
412
{
413 414 415
	__iscsi_segment_init(segment, size, done, hash);
	segment->data = data;
	segment->size = size;
O
Olaf Kirch 已提交
416 417 418
}

static inline int
419 420 421 422
iscsi_segment_seek_sg(struct iscsi_segment *segment,
		      struct scatterlist *sg_list, unsigned int sg_count,
		      unsigned int offset, size_t size,
		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
423
{
424
	struct scatterlist *sg;
O
Olaf Kirch 已提交
425 426
	unsigned int i;

427 428 429 430 431 432 433 434
	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
		  offset, size);
	__iscsi_segment_init(segment, size, done, hash);
	for_each_sg(sg_list, sg, sg_count, i) {
		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
			   sg->offset);
		if (offset < sg->length) {
			iscsi_tcp_segment_init_sg(segment, sg, offset);
O
Olaf Kirch 已提交
435
			return 0;
436
		}
437
		offset -= sg->length;
438 439
	}

O
Olaf Kirch 已提交
440 441 442 443
	return ISCSI_ERR_DATA_OFFSET;
}

/**
444
 * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
O
Olaf Kirch 已提交
445 446 447 448 449 450 451 452 453 454 455
 * @tcp_conn: iscsi connection to prep for
 *
 * This function always passes NULL for the hash argument, because when this
 * function is called we do not yet know the final size of the header and want
 * to delay the digest processing until we know that.
 */
static void
iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
456
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
457 458 459 460 461 462 463 464 465
				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
				iscsi_tcp_hdr_recv_done, NULL);
}

/*
 * Handle incoming reply to any other type of command
 */
static int
iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
466
			 struct iscsi_segment *segment)
O
Olaf Kirch 已提交
467 468 469 470
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	int rc = 0;

471
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
472 473 474 475 476 477 478 479
		return ISCSI_ERR_DATA_DGST;

	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
			conn->data, tcp_conn->in.datalen);
	if (rc)
		return rc;

	iscsi_tcp_hdr_recv_prep(tcp_conn);
480 481 482
	return 0;
}

O
Olaf Kirch 已提交
483 484 485 486 487 488 489 490 491
static void
iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct hash_desc *rx_hash = NULL;

	if (conn->datadgst_en)
		rx_hash = &tcp_conn->rx_hash;

492
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
493 494 495 496
				conn->data, tcp_conn->in.datalen,
				iscsi_tcp_data_recv_done, rx_hash);
}

M
Mike Christie 已提交
497 498 499 500
/*
 * must be called with session lock
 */
static void
501
iscsi_tcp_cleanup_task(struct iscsi_conn *conn, struct iscsi_task *task)
502
{
503
	struct iscsi_tcp_task *tcp_task = task->dd_data;
504
	struct iscsi_r2t_info *r2t;
505

506 507
	/* nothing to do for mgmt tasks */
	if (!task->sc)
508 509
		return;

510 511 512
	/* flush task's r2t queues */
	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
513
			    sizeof(void*));
514
		debug_scsi("iscsi_tcp_cleanup_task pending r2t dropped\n");
515 516
	}

517
	r2t = tcp_task->r2t;
518
	if (r2t != NULL) {
519
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
520
			    sizeof(void*));
521
		tcp_task->r2t = NULL;
522
	}
523 524 525
}

/**
526
 * iscsi_data_in - SCSI Data-In Response processing
527
 * @conn: iscsi connection
528
 * @task: scsi command task
529 530
 **/
static int
531
iscsi_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
532
{
533
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
534
	struct iscsi_tcp_task *tcp_task = task->dd_data;
535
	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
536
	int datasn = be32_to_cpu(rhdr->datasn);
537
	unsigned total_in_length = scsi_in(task->sc)->length;
538

539
	iscsi_update_cmdsn(conn->session, (struct iscsi_nopin*)rhdr);
540
	if (tcp_conn->in.datalen == 0)
541 542
		return 0;

543 544
	if (tcp_task->exp_datasn != datasn) {
		debug_tcp("%s: task->exp_datasn(%d) != rhdr->datasn(%d)\n",
545
		          __func__, tcp_task->exp_datasn, datasn);
546
		return ISCSI_ERR_DATASN;
547
	}
548

549
	tcp_task->exp_datasn++;
550

551 552
	tcp_task->data_offset = be32_to_cpu(rhdr->offset);
	if (tcp_task->data_offset + tcp_conn->in.datalen > total_in_length) {
553
		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
554
		          __func__, tcp_task->data_offset,
555
		          tcp_conn->in.datalen, total_in_length);
556
		return ISCSI_ERR_DATA_OFFSET;
557
	}
558 559 560 561 562 563 564 565

	conn->datain_pdus_cnt++;
	return 0;
}

/**
 * iscsi_solicit_data_init - initialize first Data-Out
 * @conn: iscsi connection
566
 * @task: scsi command task
567 568 569 570 571 572 573 574 575
 * @r2t: R2T info
 *
 * Notes:
 *	Initialize first Data-Out within this R2T sequence and finds
 *	proper data_offset within this SCSI command.
 *
 *	This function is called with connection lock taken.
 **/
static void
576
iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_task *task,
577 578 579 580
			struct iscsi_r2t_info *r2t)
{
	struct iscsi_data *hdr;

581
	hdr = &r2t->dtask.hdr;
582 583 584 585 586
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
587 588
	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
	hdr->itt = task->hdr->itt;
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
	hdr->exp_statsn = r2t->exp_statsn;
	hdr->offset = cpu_to_be32(r2t->data_offset);
	if (r2t->data_length > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
		hdr->flags = 0;
	} else {
		hton24(hdr->dlength, r2t->data_length);
		r2t->data_count = r2t->data_length;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}
	conn->dataout_pdus_cnt++;

	r2t->sent = 0;
}

/**
 * iscsi_r2t_rsp - iSCSI R2T Response processing
 * @conn: iscsi connection
608
 * @task: scsi command task
609 610
 **/
static int
611
iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
612 613 614
{
	struct iscsi_r2t_info *r2t;
	struct iscsi_session *session = conn->session;
615
	struct iscsi_tcp_task *tcp_task = task->dd_data;
616 617
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
618 619 620
	int r2tsn = be32_to_cpu(rhdr->r2tsn);
	int rc;

621
	if (tcp_conn->in.datalen) {
622 623 624
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2t with datalen %d\n",
				  tcp_conn->in.datalen);
625
		return ISCSI_ERR_DATALEN;
626
	}
627

628 629
	if (tcp_task->exp_datasn != r2tsn){
		debug_tcp("%s: task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
630
		          __func__, tcp_task->exp_datasn, r2tsn);
631
		return ISCSI_ERR_R2TSN;
632
	}
633

634
	/* fill-in new R2T associated with the task */
635 636
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);

637
	if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
638 639
		iscsi_conn_printk(KERN_INFO, conn,
				  "dropping R2T itt %d in recovery.\n",
640
				  task->itt);
641 642
		return 0;
	}
643

644
	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
645 646 647 648
	BUG_ON(!rc);

	r2t->exp_statsn = rhdr->statsn;
	r2t->data_length = be32_to_cpu(rhdr->data_length);
649
	if (r2t->data_length == 0) {
650 651
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with zero data len\n");
652
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
653
			    sizeof(void*));
654 655 656
		return ISCSI_ERR_DATALEN;
	}

657 658 659 660 661
	if (r2t->data_length > session->max_burst)
		debug_scsi("invalid R2T with data len %u and max burst %u."
			   "Attempting to execute request.\n",
			    r2t->data_length, session->max_burst);

662
	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
663
	if (r2t->data_offset + r2t->data_length > scsi_out(task->sc)->length) {
664 665 666
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with data len %u at offset %u "
				  "and total length %d\n", r2t->data_length,
667 668
				  r2t->data_offset, scsi_out(task->sc)->length);
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
669
			    sizeof(void*));
670 671 672 673 674 675
		return ISCSI_ERR_DATALEN;
	}

	r2t->ttt = rhdr->ttt; /* no flip */
	r2t->solicit_datasn = 0;

676
	iscsi_solicit_data_init(conn, task, r2t);
677

678 679
	tcp_task->exp_datasn = r2tsn + 1;
	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
680
	conn->r2t_pdus_cnt++;
681

682
	iscsi_requeue_task(task);
683 684 685
	return 0;
}

O
Olaf Kirch 已提交
686 687 688 689 690
/*
 * Handle incoming reply to DataIn command
 */
static int
iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
691
			  struct iscsi_segment *segment)
O
Olaf Kirch 已提交
692 693 694 695 696
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
	int rc;

697
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
		return ISCSI_ERR_DATA_DGST;

	/* check for non-exceptional status */
	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
		if (rc)
			return rc;
	}

	iscsi_tcp_hdr_recv_prep(tcp_conn);
	return 0;
}

/**
 * iscsi_tcp_hdr_dissect - process PDU header
 * @conn: iSCSI connection
 * @hdr: PDU header
 *
 * This function analyzes the header of the PDU received,
 * and performs several sanity checks. If the PDU is accompanied
 * by data, the receive buffer is set up to copy the incoming data
 * to the correct location.
 */
721
static int
O
Olaf Kirch 已提交
722
iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
723
{
724 725
	int rc = 0, opcode, ahslen;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
726
	struct iscsi_task *task;
727 728

	/* verify PDU length */
729 730
	tcp_conn->in.datalen = ntoh24(hdr->dlength);
	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
731 732 733
		iscsi_conn_printk(KERN_ERR, conn,
				  "iscsi_tcp: datalen %d > %d\n",
				  tcp_conn->in.datalen, conn->max_recv_dlength);
734 735 736
		return ISCSI_ERR_DATALEN;
	}

O
Olaf Kirch 已提交
737 738 739
	/* Additional header segments. So far, we don't
	 * process additional headers.
	 */
740
	ahslen = hdr->hlength << 2;
741

742
	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
743
	/* verify itt (itt encoding: age+cid+itt) */
744
	rc = iscsi_verify_itt(conn, hdr->itt);
745
	if (rc)
746
		return rc;
747

O
Olaf Kirch 已提交
748 749
	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
		  opcode, ahslen, tcp_conn->in.datalen);
750

751 752
	switch(opcode) {
	case ISCSI_OP_SCSI_DATA_IN:
753
		spin_lock(&conn->session->lock);
754 755
		task = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!task)
756 757
			rc = ISCSI_ERR_BAD_ITT;
		else
758
			rc = iscsi_data_in(conn, task);
759 760 761 762
		if (rc) {
			spin_unlock(&conn->session->lock);
			break;
		}
763

O
Olaf Kirch 已提交
764
		if (tcp_conn->in.datalen) {
765
			struct iscsi_tcp_task *tcp_task = task->dd_data;
O
Olaf Kirch 已提交
766
			struct hash_desc *rx_hash = NULL;
767
			struct scsi_data_buffer *sdb = scsi_in(task->sc);
O
Olaf Kirch 已提交
768 769 770 771

			/*
			 * Setup copy of Data-In into the Scsi_Cmnd
			 * Scatterlist case:
772
			 * We set up the iscsi_segment to point to the next
O
Olaf Kirch 已提交
773 774 775 776 777 778 779 780 781
			 * scatterlist entry to copy to. As we go along,
			 * we move on to the next scatterlist entry and
			 * update the digest per-entry.
			 */
			if (conn->datadgst_en)
				rx_hash = &tcp_conn->rx_hash;

			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
				  "datalen=%d)\n", tcp_conn,
782
				  tcp_task->data_offset,
O
Olaf Kirch 已提交
783
				  tcp_conn->in.datalen);
784 785 786 787 788 789 790 791 792
			rc = iscsi_segment_seek_sg(&tcp_conn->in.segment,
						   sdb->table.sgl,
						   sdb->table.nents,
						   tcp_task->data_offset,
						   tcp_conn->in.datalen,
						   iscsi_tcp_process_data_in,
						   rx_hash);
			spin_unlock(&conn->session->lock);
			return rc;
O
Olaf Kirch 已提交
793
		}
794 795 796
		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
		spin_unlock(&conn->session->lock);
		break;
797
	case ISCSI_OP_SCSI_CMD_RSP:
O
Olaf Kirch 已提交
798 799 800 801 802
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
803 804
		break;
	case ISCSI_OP_R2T:
805
		spin_lock(&conn->session->lock);
806 807
		task = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!task)
808 809
			rc = ISCSI_ERR_BAD_ITT;
		else if (ahslen)
810
			rc = ISCSI_ERR_AHSLEN;
811
		else if (task->sc->sc_data_direction == DMA_TO_DEVICE)
812
			rc = iscsi_r2t_rsp(conn, task);
813
		else
814
			rc = ISCSI_ERR_PROTO;
815
		spin_unlock(&conn->session->lock);
816 817 818 819 820
		break;
	case ISCSI_OP_LOGIN_RSP:
	case ISCSI_OP_TEXT_RSP:
	case ISCSI_OP_REJECT:
	case ISCSI_OP_ASYNC_EVENT:
821 822 823 824 825
		/*
		 * It is possible that we could get a PDU with a buffer larger
		 * than 8K, but there are no targets that currently do this.
		 * For now we fail until we find a vendor that needs it
		 */
O
Olaf Kirch 已提交
826
		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
827 828 829 830 831 832
			iscsi_conn_printk(KERN_ERR, conn,
					  "iscsi_tcp: received buffer of "
					  "len %u but conn buffer is only %u "
					  "(opcode %0x)\n",
					  tcp_conn->in.datalen,
					  ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
833 834 835 836
			rc = ISCSI_ERR_PROTO;
			break;
		}

O
Olaf Kirch 已提交
837 838 839 840 841 842 843
		/* If there's data coming in with the response,
		 * receive it to the connection's buffer.
		 */
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
844
	/* fall through */
845 846
	case ISCSI_OP_LOGOUT_RSP:
	case ISCSI_OP_NOOP_IN:
847 848 849 850 851 852 853
	case ISCSI_OP_SCSI_TMFUNC_RSP:
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
		break;
	default:
		rc = ISCSI_ERR_BAD_OPCODE;
		break;
	}
854

O
Olaf Kirch 已提交
855 856 857 858 859 860
	if (rc == 0) {
		/* Anything that comes with data should have
		 * been handled above. */
		if (tcp_conn->in.datalen)
			return ISCSI_ERR_PROTO;
		iscsi_tcp_hdr_recv_prep(tcp_conn);
861 862
	}

O
Olaf Kirch 已提交
863
	return rc;
864 865
}

O
Olaf Kirch 已提交
866 867 868 869 870 871 872 873 874
/**
 * iscsi_tcp_hdr_recv_done - process PDU header
 *
 * This is the callback invoked when the PDU header has
 * been received. If the header is followed by additional
 * header segments, we go back for more data.
 */
static int
iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
875
			struct iscsi_segment *segment)
876
{
O
Olaf Kirch 已提交
877 878
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr;
879

O
Olaf Kirch 已提交
880 881 882 883 884
	/* Check if there are additional header segments
	 * *prior* to computing the digest, because we
	 * may need to go back to the caller for more.
	 */
	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
885
	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
O
Olaf Kirch 已提交
886 887 888 889 890 891 892 893 894
		/* Bump the header length - the caller will
		 * just loop around and get the AHS for us, and
		 * call again. */
		unsigned int ahslen = hdr->hlength << 2;

		/* Make sure we don't overflow */
		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
			return ISCSI_ERR_AHSLEN;

895 896
		segment->total_size += ahslen;
		segment->size += ahslen;
O
Olaf Kirch 已提交
897
		return 0;
898 899
	}

O
Olaf Kirch 已提交
900 901 902 903
	/* We're done processing the header. See if we're doing
	 * header digests; if so, set up the recv_digest buffer
	 * and go back for more. */
	if (conn->hdrdgst_en) {
904 905 906
		if (segment->digest_len == 0) {
			iscsi_tcp_segment_splice_digest(segment,
							segment->recv_digest);
O
Olaf Kirch 已提交
907
			return 0;
908
		}
O
Olaf Kirch 已提交
909
		iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
910 911
				      segment->total_copied - ISCSI_DIGEST_SIZE,
				      segment->digest);
912

913
		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
914
			return ISCSI_ERR_HDR_DGST;
915
	}
O
Olaf Kirch 已提交
916 917 918

	tcp_conn->in.hdr = hdr;
	return iscsi_tcp_hdr_dissect(conn, hdr);
919 920 921
}

/**
O
Olaf Kirch 已提交
922
 * iscsi_tcp_recv - TCP receive in sendfile fashion
923 924 925 926 927 928
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
 **/
static int
O
Olaf Kirch 已提交
929 930
iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
	       unsigned int offset, size_t len)
931 932
{
	struct iscsi_conn *conn = rd_desc->arg.data;
933
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
934
	struct iscsi_segment *segment = &tcp_conn->in.segment;
O
Olaf Kirch 已提交
935 936 937
	struct skb_seq_state seq;
	unsigned int consumed = 0;
	int rc = 0;
938

O
Olaf Kirch 已提交
939
	debug_tcp("in %d bytes\n", skb->len - offset);
940 941 942 943 944 945

	if (unlikely(conn->suspend_rx)) {
		debug_tcp("conn %d Rx suspended!\n", conn->id);
		return 0;
	}

O
Olaf Kirch 已提交
946 947 948 949
	skb_prepare_seq_read(skb, offset, skb->len, &seq);
	while (1) {
		unsigned int avail;
		const u8 *ptr;
950

O
Olaf Kirch 已提交
951
		avail = skb_seq_read(consumed, &ptr, &seq);
952 953 954
		if (avail == 0) {
			debug_tcp("no more data avail. Consumed %d\n",
				  consumed);
O
Olaf Kirch 已提交
955
			break;
956 957
		}
		BUG_ON(segment->copied >= segment->size);
O
Olaf Kirch 已提交
958 959

		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
960
		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
O
Olaf Kirch 已提交
961 962 963
		BUG_ON(rc == 0);
		consumed += rc;

964 965 966
		if (segment->total_copied >= segment->total_size) {
			debug_tcp("segment done\n");
			rc = segment->done(tcp_conn, segment);
O
Olaf Kirch 已提交
967 968 969
			if (rc != 0) {
				skb_abort_seq_read(&seq);
				goto error;
970
			}
971

O
Olaf Kirch 已提交
972
			/* The done() functions sets up the
973
			 * next segment. */
974 975
		}
	}
976
	skb_abort_seq_read(&seq);
O
Olaf Kirch 已提交
977 978
	conn->rxdata_octets += consumed;
	return consumed;
979

O
Olaf Kirch 已提交
980 981 982 983
error:
	debug_tcp("Error receiving PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return 0;
984 985 986 987 988 989
}

static void
iscsi_tcp_data_ready(struct sock *sk, int flag)
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
990
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
991 992 993 994
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

995
	/*
O
Olaf Kirch 已提交
996
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
997
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
998
	 * hand us all the skbs that are available. iscsi_tcp_recv
999 1000
	 * handled pdus that cross buffers or pdus that still need data.
	 */
1001
	rd_desc.arg.data = conn;
1002
	rd_desc.count = 1;
O
Olaf Kirch 已提交
1003
	tcp_read_sock(sk, &rd_desc, iscsi_tcp_recv);
1004 1005

	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
1006 1007 1008

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
1009
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
1010 1011 1012 1013 1014
}

static void
iscsi_tcp_state_change(struct sock *sk)
{
1015
	struct iscsi_tcp_conn *tcp_conn;
1016 1017 1018 1019 1020 1021 1022 1023 1024
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

M
Mike Christie 已提交
1025 1026 1027
	if ((sk->sk_state == TCP_CLOSE_WAIT ||
	     sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc)) {
1028 1029 1030 1031
		debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

1032 1033
	tcp_conn = conn->dd_data;
	old_state_change = tcp_conn->old_state_change;
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
static void
iscsi_write_space(struct sock *sk)
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
1048 1049 1050
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	tcp_conn->old_write_space(sk);
1051
	debug_tcp("iscsi_write_space: cid %d\n", conn->id);
1052
	scsi_queue_work(conn->session->host, &conn->xmitwork);
1053 1054 1055 1056 1057
}

static void
iscsi_conn_set_callbacks(struct iscsi_conn *conn)
{
1058 1059
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk = tcp_conn->sock->sk;
1060 1061 1062 1063

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
1064 1065 1066
	tcp_conn->old_data_ready = sk->sk_data_ready;
	tcp_conn->old_state_change = sk->sk_state_change;
	tcp_conn->old_write_space = sk->sk_write_space;
1067 1068 1069 1070 1071 1072 1073
	sk->sk_data_ready = iscsi_tcp_data_ready;
	sk->sk_state_change = iscsi_tcp_state_change;
	sk->sk_write_space = iscsi_write_space;
	write_unlock_bh(&sk->sk_callback_lock);
}

static void
1074
iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
1075
{
1076
	struct sock *sk = tcp_conn->sock->sk;
1077 1078 1079 1080

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
1081 1082 1083
	sk->sk_data_ready   = tcp_conn->old_data_ready;
	sk->sk_state_change = tcp_conn->old_state_change;
	sk->sk_write_space  = tcp_conn->old_write_space;
1084 1085 1086 1087 1088
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
1089 1090 1091 1092
 * iscsi_xmit - TCP transmit
 **/
static int
iscsi_xmit(struct iscsi_conn *conn)
1093
{
1094
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1095 1096 1097
	struct iscsi_segment *segment = &tcp_conn->out.segment;
	unsigned int consumed = 0;
	int rc = 0;
1098

1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
	while (1) {
		rc = iscsi_tcp_xmit_segment(tcp_conn, segment);
		if (rc < 0)
			goto error;
		if (rc == 0)
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
				if (rc < 0)
					goto error;
			}
		}
1115 1116
	}

1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127
	debug_tcp("xmit %d bytes\n", consumed);

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
	debug_tcp("Error sending PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return rc;
1128 1129 1130
}

/**
1131 1132
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
1133
static inline int
1134
iscsi_tcp_xmit_qlen(struct iscsi_conn *conn)
1135
{
1136 1137
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_segment *segment = &tcp_conn->out.segment;
1138

1139
	return segment->total_copied - segment->total_size;
1140 1141 1142
}

static inline int
1143
iscsi_tcp_flush(struct iscsi_conn *conn)
1144
{
1145 1146 1147 1148 1149
	int rc;

	while (iscsi_tcp_xmit_qlen(conn)) {
		rc = iscsi_xmit(conn);
		if (rc == 0)
1150
			return -EAGAIN;
1151 1152
		if (rc < 0)
			return rc;
1153
	}
1154

1155
	return 0;
1156 1157
}

1158 1159 1160 1161 1162 1163 1164
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
static int
iscsi_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
			struct iscsi_segment *segment)
1165
{
1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
	tcp_conn->out.segment = tcp_conn->out.data_segment;
	debug_tcp("Header done. Next segment size %u total_size %u\n",
		  tcp_conn->out.segment.size, tcp_conn->out.segment.total_size);
	return 0;
}

static void
iscsi_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, size_t hdrlen)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

1177
	debug_tcp("%s(%p%s)\n", __func__, tcp_conn,
1178 1179 1180 1181 1182 1183 1184 1185
			conn->hdrdgst_en? ", digest enabled" : "");

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
	memset(&tcp_conn->out.data_segment, 0, sizeof(struct iscsi_segment));

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
1186
	 * sure that both iscsi_tcp_task and mtask have
1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
		iscsi_tcp_dgst_header(&tcp_conn->tx_hash, hdr, hdrlen,
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
	tcp_conn->out.hdr = hdr;

	iscsi_segment_init_linear(&tcp_conn->out.segment, hdr, hdrlen,
				iscsi_tcp_send_hdr_done, NULL);
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
iscsi_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			 unsigned int count, unsigned int offset,
			 unsigned int len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

1218
	debug_tcp("%s(%p, offset=%d, datalen=%d%s)\n", __func__,
1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242
			tcp_conn, offset, len,
			conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	return iscsi_segment_seek_sg(&tcp_conn->out.data_segment,
				   sg, count, offset, len,
				   NULL, tx_hash);
}

static void
iscsi_tcp_send_linear_data_prepare(struct iscsi_conn *conn, void *data,
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

1243
	debug_tcp("%s(%p, datalen=%d%s)\n", __func__, tcp_conn, len,
1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
		  conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	iscsi_segment_init_linear(&tcp_conn->out.data_segment,
				data, len, NULL, tx_hash);
1256 1257 1258 1259 1260
}

/**
 * iscsi_solicit_data_cont - initialize next Data-Out
 * @conn: iscsi connection
1261
 * @task: scsi command task
1262 1263 1264 1265 1266 1267 1268 1269 1270
 * @r2t: R2T info
 * @left: bytes left to transfer
 *
 * Notes:
 *	Initialize next Data-Out within this R2T sequence and continue
 *	to process next Scatter-Gather element(if any) of this SCSI command.
 *
 *	Called under connection lock.
 **/
1271
static int
1272
iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_task *task,
1273
			struct iscsi_r2t_info *r2t)
1274 1275
{
	struct iscsi_data *hdr;
1276 1277 1278 1279 1280 1281
	int new_offset, left;

	BUG_ON(r2t->data_length - r2t->sent < 0);
	left = r2t->data_length - r2t->sent;
	if (left == 0)
		return 0;
1282

1283
	hdr = &r2t->dtask.hdr;
1284 1285 1286 1287 1288
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
1289 1290
	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
	hdr->itt = task->hdr->itt;
1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302
	hdr->exp_statsn = r2t->exp_statsn;
	new_offset = r2t->data_offset + r2t->sent;
	hdr->offset = cpu_to_be32(new_offset);
	if (left > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
	} else {
		hton24(hdr->dlength, left);
		r2t->data_count = left;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}

1303 1304
	conn->dataout_pdus_cnt++;
	return 1;
1305 1306 1307
}

/**
1308
 * iscsi_tcp_task - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
1309
 * @conn: iscsi connection
1310
 * @task: scsi command task
1311 1312
 * @sc: scsi command
 **/
1313
static int
1314
iscsi_tcp_task_init(struct iscsi_task *task)
1315
{
1316 1317 1318
	struct iscsi_tcp_task *tcp_task = task->dd_data;
	struct iscsi_conn *conn = task->conn;
	struct scsi_cmnd *sc = task->sc;
1319
	int err;
1320

1321 1322
	if (!sc) {
		/*
1323
		 * mgmt tasks do not have a scatterlist since they come
1324 1325
		 * in from the iscsi interface.
		 */
1326 1327
		debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id,
			   task->itt);
1328 1329

		/* Prepare PDU, optionally w/ immediate data */
1330
		iscsi_tcp_send_hdr_prep(conn, task->hdr, sizeof(*task->hdr));
1331 1332

		/* If we have immediate data, attach a payload */
1333 1334 1335
		if (task->data_count)
			iscsi_tcp_send_linear_data_prepare(conn, task->data,
							   task->data_count);
1336 1337 1338
		return 0;
	}

1339 1340 1341
	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
	tcp_task->sent = 0;
	tcp_task->exp_datasn = 0;
1342 1343

	/* Prepare PDU, optionally w/ immediate data */
1344 1345 1346 1347
	debug_scsi("task deq [cid %d itt 0x%x imm %d unsol %d]\n",
		    conn->id, task->itt, task->imm_count,
		    task->unsol_count);
	iscsi_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
1348

1349
	if (!task->imm_count)
1350 1351 1352
		return 0;

	/* If we have immediate data, attach a payload */
1353 1354
	err = iscsi_tcp_send_data_prep(conn, scsi_out(sc)->table.sgl,
				       scsi_out(sc)->table.nents,
1355
				       0, task->imm_count);
1356 1357
	if (err)
		return err;
1358 1359
	tcp_task->sent += task->imm_count;
	task->imm_count = 0;
1360
	return 0;
1361 1362
}

1363
/*
1364 1365
 * iscsi_tcp_task_xmit - xmit normal PDU task
 * @task: iscsi command task
1366 1367 1368 1369 1370
 *
 * We're expected to return 0 when everything was transmitted succesfully,
 * -EAGAIN if there's still data in the queue, or != 0 for any other kind
 * of error.
 */
1371
static int
1372
iscsi_tcp_task_xmit(struct iscsi_task *task)
1373
{
1374 1375 1376
	struct iscsi_conn *conn = task->conn;
	struct iscsi_tcp_task *tcp_task = task->dd_data;
	struct scsi_cmnd *sc = task->sc;
1377
	struct scsi_data_buffer *sdb;
1378
	int rc = 0;
1379

1380 1381 1382 1383
flush:
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
1384 1385
		return rc;

1386 1387
	/* mgmt command */
	if (!sc) {
1388 1389
		if (task->hdr->itt == RESERVED_ITT)
			iscsi_put_task(task);
1390 1391 1392
		return 0;
	}

1393 1394 1395
	/* Are we done already? */
	if (sc->sc_data_direction != DMA_TO_DEVICE)
		return 0;
1396

1397
	sdb = scsi_out(sc);
1398 1399
	if (task->unsol_count != 0) {
		struct iscsi_data *hdr = &tcp_task->unsol_dtask.hdr;
1400

1401 1402
		/* Prepare a header for the unsolicited PDU.
		 * The amount of data we want to send will be
1403
		 * in task->data_count.
1404 1405
		 * FIXME: return the data count instead.
		 */
1406
		iscsi_prep_unsolicit_data_pdu(task, hdr);
1407

1408
		debug_tcp("unsol dout [itt 0x%x doff %d dlen %d]\n",
1409
				task->itt, tcp_task->sent, task->data_count);
1410

1411
		iscsi_tcp_send_hdr_prep(conn, hdr, sizeof(*hdr));
1412
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
1413 1414
					      sdb->table.nents, tcp_task->sent,
					      task->data_count);
1415
		if (rc)
1416
			goto fail;
1417 1418
		tcp_task->sent += task->data_count;
		task->unsol_count -= task->data_count;
1419 1420 1421 1422
		goto flush;
	} else {
		struct iscsi_session *session = conn->session;
		struct iscsi_r2t_info *r2t;
1423

1424
		/* All unsolicited PDUs sent. Check for solicited PDUs.
1425
		 */
1426
		spin_lock_bh(&session->lock);
1427
		r2t = tcp_task->r2t;
1428 1429
		if (r2t != NULL) {
			/* Continue with this R2T? */
1430
			if (!iscsi_solicit_data_cont(conn, task, r2t)) {
1431 1432
				debug_scsi("  done with r2t %p\n", r2t);

1433
				__kfifo_put(tcp_task->r2tpool.queue,
1434
					    (void*)&r2t, sizeof(void*));
1435
				tcp_task->r2t = r2t = NULL;
1436
			}
1437 1438
		}

1439
		if (r2t == NULL) {
1440
			__kfifo_get(tcp_task->r2tqueue, (void*)&tcp_task->r2t,
1441
				    sizeof(void*));
1442
			r2t = tcp_task->r2t;
1443
		}
1444
		spin_unlock_bh(&session->lock);
1445

1446 1447 1448 1449
		/* Waiting for more R2Ts to arrive. */
		if (r2t == NULL) {
			debug_tcp("no R2Ts yet\n");
			return 0;
1450 1451
		}

1452
		debug_scsi("sol dout %p [dsn %d itt 0x%x doff %d dlen %d]\n",
1453
			r2t, r2t->solicit_datasn - 1, task->itt,
1454
			r2t->data_offset + r2t->sent, r2t->data_count);
1455

1456 1457
		iscsi_tcp_send_hdr_prep(conn, &r2t->dtask.hdr,
					sizeof(struct iscsi_hdr));
1458

1459 1460
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
					      sdb->table.nents,
1461 1462
					      r2t->data_offset + r2t->sent,
					      r2t->data_count);
1463
		if (rc)
1464
			goto fail;
1465
		tcp_task->sent += r2t->data_count;
1466 1467
		r2t->sent += r2t->data_count;
		goto flush;
1468 1469
	}
	return 0;
1470 1471 1472
fail:
	iscsi_conn_failure(conn, rc);
	return -EIO;
1473 1474
}

1475 1476
static struct iscsi_cls_conn *
iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
1477
{
1478 1479 1480
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
1481

1482
	cls_conn = iscsi_conn_setup(cls_session, sizeof(*tcp_conn), conn_idx);
1483 1484 1485
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
1486
	/*
1487 1488
	 * due to strange issues with iser these are not set
	 * in iscsi_conn_setup
1489
	 */
1490
	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
1491

1492
	tcp_conn = conn->dd_data;
1493
	tcp_conn->iscsi_conn = conn;
1494

1495 1496 1497
	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->tx_hash.flags = 0;
1498
	if (IS_ERR(tcp_conn->tx_hash.tfm))
1499
		goto free_conn;
1500

1501 1502 1503
	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->rx_hash.flags = 0;
1504
	if (IS_ERR(tcp_conn->rx_hash.tfm))
1505 1506
		goto free_tx_tfm;

1507
	return cls_conn;
1508

1509
free_tx_tfm:
1510
	crypto_free_hash(tcp_conn->tx_hash.tfm);
1511
free_conn:
1512 1513 1514 1515 1516
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
1517 1518
	iscsi_conn_teardown(cls_conn);
	return NULL;
1519 1520
}

1521 1522 1523
static void
iscsi_tcp_release_conn(struct iscsi_conn *conn)
{
1524
	struct iscsi_session *session = conn->session;
1525
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1526
	struct socket *sock = tcp_conn->sock;
1527

1528
	if (!sock)
1529 1530
		return;

1531
	sock_hold(sock->sk);
1532
	iscsi_conn_restore_callbacks(tcp_conn);
1533
	sock_put(sock->sk);
1534

1535
	spin_lock_bh(&session->lock);
1536
	tcp_conn->sock = NULL;
1537 1538
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
1539 1540
}

1541
static void
1542
iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
1543
{
1544 1545
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1546

1547
	iscsi_tcp_release_conn(conn);
1548

P
Pete Wyckoff 已提交
1549 1550 1551 1552
	if (tcp_conn->tx_hash.tfm)
		crypto_free_hash(tcp_conn->tx_hash.tfm);
	if (tcp_conn->rx_hash.tfm)
		crypto_free_hash(tcp_conn->rx_hash.tfm);
1553

1554
	iscsi_conn_teardown(cls_conn);
1555
}
1556

1557 1558 1559 1560
static void
iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
	struct iscsi_conn *conn = cls_conn->dd_data;
1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	/* userspace may have goofed up and not bound us */
	if (!tcp_conn->sock)
		return;
	/*
	 * Make sure our recv side is stopped.
	 * Older tools called conn stop before ep_disconnect
	 * so IO could still be coming in.
	 */
	write_lock_bh(&tcp_conn->sock->sk->sk_callback_lock);
	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
	write_unlock_bh(&tcp_conn->sock->sk->sk_callback_lock);
1574 1575 1576 1577 1578

	iscsi_conn_stop(cls_conn, flag);
	iscsi_tcp_release_conn(conn);
}

1579 1580 1581 1582 1583 1584 1585 1586 1587 1588
static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
			      char *buf, int *port,
			      int (*getname)(struct socket *, struct sockaddr *,
					int *addrlen))
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

1589
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr));
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

1619 1620
static int
iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
1621
		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
1622 1623
		    int is_leading)
{
1624 1625
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
	struct iscsi_host *ihost = shost_priv(shost);
1626 1627 1628 1629 1630
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk;
	struct socket *sock;
	int err;
1631

1632
	/* lookup for existing socket */
1633
	sock = sockfd_lookup((int)transport_eph, &err);
1634
	if (!sock) {
1635 1636
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
1637
		return -EEXIST;
1638
	}
1639 1640 1641 1642 1643 1644 1645 1646 1647 1648
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
	err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
				 &conn->portal_port, kernel_getpeername);
	if (err)
		goto free_socket;

1649 1650
	err = iscsi_tcp_get_addr(conn, sock, ihost->local_address,
				&ihost->local_port, kernel_getsockname);
1651 1652
	if (err)
		goto free_socket;
1653

1654 1655
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
1656
		goto free_socket;
1657

1658 1659
	/* bind iSCSI connection and socket */
	tcp_conn->sock = sock;
1660

1661 1662 1663 1664 1665
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
1666

1667 1668 1669 1670 1671
	iscsi_conn_set_callbacks(conn);
	tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
1672
	iscsi_tcp_hdr_recv_prep(tcp_conn);
1673
	return 0;
1674 1675 1676 1677

free_socket:
	sockfd_put(sock);
	return err;
1678 1679 1680 1681 1682 1683 1684 1685 1686
}

static int
iscsi_r2tpool_alloc(struct iscsi_session *session)
{
	int i;
	int cmd_i;

	/*
1687
	 * initialize per-task: R2T pool and xmit queue
1688 1689
	 */
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1690 1691
	        struct iscsi_task *task = session->cmds[cmd_i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1692 1693 1694 1695 1696 1697 1698 1699

		/*
		 * pre-allocated x4 as much r2ts to handle race when
		 * target acks DataOut faster than we data_xmit() queues
		 * could replenish r2tqueue.
		 */

		/* R2T pool */
1700
		if (iscsi_pool_init(&tcp_task->r2tpool, session->max_r2t * 4, NULL,
1701
				    sizeof(struct iscsi_r2t_info))) {
1702 1703 1704 1705
			goto r2t_alloc_fail;
		}

		/* R2T xmit queue */
1706
		tcp_task->r2tqueue = kfifo_alloc(
1707
		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
1708 1709
		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
			iscsi_pool_free(&tcp_task->r2tpool);
1710 1711 1712 1713 1714 1715 1716 1717
			goto r2t_alloc_fail;
		}
	}

	return 0;

r2t_alloc_fail:
	for (i = 0; i < cmd_i; i++) {
1718 1719
		struct iscsi_task *task = session->cmds[i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1720

1721 1722
		kfifo_free(tcp_task->r2tqueue);
		iscsi_pool_free(&tcp_task->r2tpool);
1723 1724 1725 1726 1727 1728 1729 1730 1731 1732
	}
	return -ENOMEM;
}

static void
iscsi_r2tpool_free(struct iscsi_session *session)
{
	int i;

	for (i = 0; i < session->cmds_max; i++) {
1733 1734
		struct iscsi_task *task = session->cmds[i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1735

1736 1737
		kfifo_free(tcp_task->r2tqueue);
		iscsi_pool_free(&tcp_task->r2tpool);
1738 1739 1740 1741
	}
}

static int
1742
iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
1743
		     char *buf, int buflen)
1744
{
1745
	struct iscsi_conn *conn = cls_conn->dd_data;
1746
	struct iscsi_session *session = conn->session;
1747
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1748
	int value;
1749 1750 1751

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
1752
		iscsi_set_param(cls_conn, param, buf, buflen);
1753 1754
		break;
	case ISCSI_PARAM_DATADGST_EN:
1755
		iscsi_set_param(cls_conn, param, buf, buflen);
1756 1757
		tcp_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
1758 1759
		break;
	case ISCSI_PARAM_MAX_R2T:
1760
		sscanf(buf, "%d", &value);
1761 1762 1763
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
1764 1765
			break;
		iscsi_r2tpool_free(session);
1766
		iscsi_set_param(cls_conn, param, buf, buflen);
1767 1768 1769 1770
		if (iscsi_r2tpool_alloc(session))
			return -ENOMEM;
		break;
	default:
1771
		return iscsi_set_param(cls_conn, param, buf, buflen);
1772 1773 1774 1775 1776 1777
	}

	return 0;
}

static int
1778 1779
iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
			 enum iscsi_param param, char *buf)
1780
{
1781
	struct iscsi_conn *conn = cls_conn->dd_data;
1782
	int len;
1783 1784

	switch(param) {
1785
	case ISCSI_PARAM_CONN_PORT:
1786 1787 1788
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
1789
		break;
1790
	case ISCSI_PARAM_CONN_ADDRESS:
1791 1792 1793
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
1794 1795
		break;
	default:
1796
		return iscsi_conn_get_param(cls_conn, param, buf);
1797 1798 1799 1800 1801
	}

	return len;
}

1802
static void
1803
iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
1804
{
1805
	struct iscsi_conn *conn = cls_conn->dd_data;
1806
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818

	stats->txdata_octets = conn->txdata_octets;
	stats->rxdata_octets = conn->rxdata_octets;
	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
	stats->dataout_pdus = conn->dataout_pdus_cnt;
	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
	stats->datain_pdus = conn->datain_pdus_cnt;
	stats->r2t_pdus = conn->r2t_pdus_cnt;
	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
1819
	stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
1820
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
1821
	stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
1822 1823 1824 1825
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
}

1826
static struct iscsi_cls_session *
1827
iscsi_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
1828 1829
			 uint16_t qdepth, uint32_t initial_cmdsn,
			 uint32_t *hostno)
1830
{
1831 1832
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
1833
	struct Scsi_Host *shost;
1834
	int cmd_i;
1835

1836 1837
	if (ep) {
		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
1838 1839 1840
		return NULL;
	}

1841
	shost = iscsi_host_alloc(&iscsi_sht, 0, qdepth);
1842
	if (!shost)
1843
		return NULL;
1844 1845 1846 1847
	shost->transportt = iscsi_tcp_scsi_transport;
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
1848
	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
1849

1850
	if (iscsi_host_add(shost, NULL))
1851 1852 1853 1854
		goto free_host;
	*hostno = shost->host_no;

	cls_session = iscsi_session_setup(&iscsi_tcp_transport, shost, cmds_max,
1855
					  sizeof(struct iscsi_tcp_task),
1856
					  initial_cmdsn, 0);
1857 1858 1859
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
1860

1861
	shost->can_queue = session->scsi_cmds_max;
1862
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1863 1864
		struct iscsi_task *task = session->cmds[cmd_i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1865

1866 1867
		task->hdr = &tcp_task->hdr.cmd_hdr;
		task->hdr_max = sizeof(tcp_task->hdr) - ISCSI_DIGEST_SIZE;
1868 1869
	}

1870 1871
	if (iscsi_r2tpool_alloc(session))
		goto remove_session;
1872 1873
	return cls_session;

1874
remove_session:
1875
	iscsi_session_teardown(cls_session);
1876
remove_host:
1877
	iscsi_host_remove(shost);
1878
free_host:
1879
	iscsi_host_free(shost);
1880 1881 1882 1883 1884
	return NULL;
}

static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
{
1885 1886 1887
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);

	iscsi_r2tpool_free(cls_session->dd_data);
1888
	iscsi_session_teardown(cls_session);
1889

1890 1891
	iscsi_host_remove(shost);
	iscsi_host_free(shost);
1892 1893
}

1894 1895
static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
{
1896
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
1897 1898 1899 1900
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

1901
static struct scsi_host_template iscsi_sht = {
1902
	.module			= THIS_MODULE,
1903
	.name			= "iSCSI Initiator over TCP/IP",
1904 1905
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
1906
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
1907
	.sg_tablesize		= 4096,
1908
	.max_sectors		= 0xFFFF,
1909 1910
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
1911
	.eh_device_reset_handler= iscsi_eh_device_reset,
1912 1913
	.eh_host_reset_handler	= iscsi_eh_host_reset,
	.use_clustering         = DISABLE_CLUSTERING,
1914
	.slave_configure        = iscsi_tcp_slave_configure,
1915 1916 1917 1918
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

1919 1920 1921 1922 1923
static struct iscsi_transport iscsi_tcp_transport = {
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
1937
				  ISCSI_CONN_ADDRESS |
1938 1939 1940
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
1941 1942
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
1943
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
1944 1945
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
				  ISCSI_LU_RESET_TMO |
1946 1947
				  ISCSI_PING_TMO | ISCSI_RECV_TMO |
				  ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
1948
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
1949 1950
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
1951 1952 1953 1954 1955 1956 1957
	/* session management */
	.create_session		= iscsi_tcp_session_create,
	.destroy_session	= iscsi_tcp_session_destroy,
	/* connection management */
	.create_conn		= iscsi_tcp_conn_create,
	.bind_conn		= iscsi_tcp_conn_bind,
	.destroy_conn		= iscsi_tcp_conn_destroy,
1958
	.set_param		= iscsi_conn_set_param,
1959
	.get_conn_param		= iscsi_tcp_conn_get_param,
1960
	.get_session_param	= iscsi_session_get_param,
1961
	.start_conn		= iscsi_conn_start,
1962
	.stop_conn		= iscsi_tcp_conn_stop,
1963
	/* iscsi host params */
1964
	.get_host_param		= iscsi_host_get_param,
1965
	.set_host_param		= iscsi_host_set_param,
1966
	/* IO */
1967 1968
	.send_pdu		= iscsi_conn_send_pdu,
	.get_stats		= iscsi_conn_get_stats,
1969 1970 1971
	.init_task		= iscsi_tcp_task_init,
	.xmit_task		= iscsi_tcp_task_xmit,
	.cleanup_task		= iscsi_tcp_cleanup_task,
1972
	/* recovery */
M
Mike Christie 已提交
1973
	.session_recovery_timedout = iscsi_session_recovery_timedout,
1974 1975 1976 1977 1978 1979
};

static int __init
iscsi_tcp_init(void)
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
1980 1981
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
1982 1983 1984
		return -EINVAL;
	}

1985 1986 1987
	iscsi_tcp_scsi_transport = iscsi_register_transport(
							&iscsi_tcp_transport);
	if (!iscsi_tcp_scsi_transport)
1988
		return -ENODEV;
1989

1990
	return 0;
1991 1992 1993 1994 1995 1996 1997 1998 1999 2000
}

static void __exit
iscsi_tcp_exit(void)
{
	iscsi_unregister_transport(&iscsi_tcp_transport);
}

module_init(iscsi_tcp_init);
module_exit(iscsi_tcp_exit);