iscsi_tcp.c 53.7 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/list.h>
#include <linux/inet.h>
M
Mike Christie 已提交
32
#include <linux/file.h>
33 34 35 36 37 38 39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41 42 43 44 45 46 47 48 49 50
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");
O
Olaf Kirch 已提交
51
#undef DEBUG_TCP
52 53 54
#define DEBUG_ASSERT

#ifdef DEBUG_TCP
55
#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
56 57 58 59 60 61 62 63 64 65 66
#else
#define debug_tcp(fmt...)
#endif

#ifndef DEBUG_ASSERT
#ifdef BUG_ON
#undef BUG_ON
#endif
#define BUG_ON(expr)
#endif

67 68 69 70
static struct scsi_transport_template *iscsi_tcp_scsi_transport;
static struct scsi_host_template iscsi_sht;
static struct iscsi_transport iscsi_tcp_transport;

71 72 73
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

O
Olaf Kirch 已提交
74
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
75
				   struct iscsi_segment *segment);
76

O
Olaf Kirch 已提交
77
/*
78
 * Scatterlist handling: inside the iscsi_segment, we
O
Olaf Kirch 已提交
79 80 81 82 83 84 85 86 87 88 89 90
 * remember an index into the scatterlist, and set data/size
 * to the current scatterlist entry. For highmem pages, we
 * kmap as needed.
 *
 * Note that the page is unmapped when we return from
 * TCP's data_ready handler, so we may end up mapping and
 * unmapping the same page repeatedly. The whole reason
 * for this is that we shouldn't keep the page mapped
 * outside the softirq.
 */

/**
91 92 93
 * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
 * @segment: the buffer object
 * @sg: scatterlist
O
Olaf Kirch 已提交
94 95
 * @offset: byte offset into that sg entry
 *
96
 * This function sets up the segment so that subsequent
O
Olaf Kirch 已提交
97 98 99 100
 * data is copied to the indicated sg entry, at the given
 * offset.
 */
static inline void
101 102
iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
			  struct scatterlist *sg, unsigned int offset)
O
Olaf Kirch 已提交
103
{
104 105 106 107 108
	segment->sg = sg;
	segment->sg_offset = offset;
	segment->size = min(sg->length - offset,
			    segment->total_size - segment->total_copied);
	segment->data = NULL;
O
Olaf Kirch 已提交
109 110 111
}

/**
112 113 114
 * iscsi_tcp_segment_map - map the current S/G page
 * @segment: iscsi_segment
 * @recv: 1 if called from recv path
O
Olaf Kirch 已提交
115 116 117 118 119 120
 *
 * We only need to possibly kmap data if scatter lists are being used,
 * because the iscsi passthrough and internal IO paths will never use high
 * mem pages.
 */
static inline void
121
iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
O
Olaf Kirch 已提交
122 123 124
{
	struct scatterlist *sg;

125
	if (segment->data != NULL || !segment->sg)
O
Olaf Kirch 已提交
126 127
		return;

128 129
	sg = segment->sg;
	BUG_ON(segment->sg_mapped);
O
Olaf Kirch 已提交
130
	BUG_ON(sg->length == 0);
131 132 133 134 135 136 137 138 139 140 141 142 143 144

	/*
	 * If the page count is greater than one it is ok to send
	 * to the network layer's zero copy send path. If not we
	 * have to go the slow sendmsg path. We always map for the
	 * recv path.
	 */
	if (page_count(sg_page(sg)) >= 1 && !recv)
		return;

	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
		  segment);
	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
O
Olaf Kirch 已提交
145 146 147
}

static inline void
148
iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
O
Olaf Kirch 已提交
149
{
150 151 152 153 154 155 156
	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);

	if (segment->sg_mapped) {
		debug_tcp("iscsi_tcp_segment_unmap valid\n");
		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
		segment->sg_mapped = NULL;
		segment->data = NULL;
O
Olaf Kirch 已提交
157 158 159 160 161 162 163
	}
}

/*
 * Splice the digest buffer into the buffer
 */
static inline void
164
iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
O
Olaf Kirch 已提交
165
{
166 167 168 169 170 171 172
	segment->data = digest;
	segment->digest_len = ISCSI_DIGEST_SIZE;
	segment->total_size += ISCSI_DIGEST_SIZE;
	segment->size = ISCSI_DIGEST_SIZE;
	segment->copied = 0;
	segment->sg = NULL;
	segment->hash = NULL;
O
Olaf Kirch 已提交
173 174 175
}

/**
176 177 178 179
 * iscsi_tcp_segment_done - check whether the segment is complete
 * @segment: iscsi segment to check
 * @recv: set to one of this is called from the recv path
 * @copied: number of bytes copied
O
Olaf Kirch 已提交
180
 *
181
 * Check if we're done receiving this segment. If the receive
O
Olaf Kirch 已提交
182 183 184 185 186 187 188 189
 * buffer is full but we expect more data, move on to the
 * next entry in the scatterlist.
 *
 * If the amount of data we received isn't a multiple of 4,
 * we will transparently receive the pad bytes, too.
 *
 * This function must be re-entrant.
 */
190
static inline int
191
iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv, unsigned copied)
192
{
O
Olaf Kirch 已提交
193
	static unsigned char padbuf[ISCSI_PAD_LEN];
194
	struct scatterlist sg;
195
	unsigned int pad;
O
Olaf Kirch 已提交
196

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
		  segment->size, recv ? "recv" : "xmit");
	if (segment->hash && copied) {
		/*
		 * If a segment is kmapd we must unmap it before sending
		 * to the crypto layer since that will try to kmap it again.
		 */
		iscsi_tcp_segment_unmap(segment);

		if (!segment->data) {
			sg_init_table(&sg, 1);
			sg_set_page(&sg, sg_page(segment->sg), copied,
				    segment->copied + segment->sg_offset +
							segment->sg->offset);
		} else
			sg_init_one(&sg, segment->data + segment->copied,
				    copied);
		crypto_hash_update(segment->hash, &sg, copied);
	}

	segment->copied += copied;
	if (segment->copied < segment->size) {
		iscsi_tcp_segment_map(segment, recv);
O
Olaf Kirch 已提交
220 221
		return 0;
	}
222

223 224 225
	segment->total_copied += segment->copied;
	segment->copied = 0;
	segment->size = 0;
226

O
Olaf Kirch 已提交
227
	/* Unmap the current scatterlist page, if there is one. */
228
	iscsi_tcp_segment_unmap(segment);
O
Olaf Kirch 已提交
229 230

	/* Do we have more scatterlist entries? */
231 232 233
	debug_tcp("total copied %u total size %u\n", segment->total_copied,
		   segment->total_size);
	if (segment->total_copied < segment->total_size) {
O
Olaf Kirch 已提交
234
		/* Proceed to the next entry in the scatterlist. */
235 236 237 238
		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
					  0);
		iscsi_tcp_segment_map(segment, recv);
		BUG_ON(segment->size == 0);
O
Olaf Kirch 已提交
239 240 241 242
		return 0;
	}

	/* Do we need to handle padding? */
243
	pad = iscsi_padding(segment->total_copied);
244
	if (pad != 0) {
O
Olaf Kirch 已提交
245
		debug_tcp("consume %d pad bytes\n", pad);
246 247 248
		segment->total_size += pad;
		segment->size = pad;
		segment->data = padbuf;
O
Olaf Kirch 已提交
249 250 251 252
		return 0;
	}

	/*
253
	 * Set us up for transferring the data digest. hdr digest
O
Olaf Kirch 已提交
254 255
	 * is completely handled in hdr done function.
	 */
256 257 258 259 260
	if (segment->hash) {
		crypto_hash_final(segment->hash, segment->digest);
		iscsi_tcp_segment_splice_digest(segment,
				 recv ? segment->recv_digest : segment->digest);
		return 0;
O
Olaf Kirch 已提交
261
	}
262

O
Olaf Kirch 已提交
263 264
	return 1;
}
265

O
Olaf Kirch 已提交
266
/**
267
 * iscsi_tcp_xmit_segment - transmit segment
O
Olaf Kirch 已提交
268
 * @tcp_conn: the iSCSI TCP connection
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
static int
iscsi_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment)
{
	struct socket *sk = tcp_conn->sock;
	unsigned int copied = 0;
	int r = 0;

	while (!iscsi_tcp_segment_done(segment, 0, r)) {
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_conn->sendpage(sk, sg_page(sg), offset, copy,
					       flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			if (copied || r == -EAGAIN)
				break;
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_tcp_segment_recv - copy data to segment
 * @tcp_conn: the iSCSI TCP connection
 * @segment: the buffer to copy to
O
Olaf Kirch 已提交
330 331 332 333 334 335 336 337 338 339 340 341 342 343
 * @ptr: data pointer
 * @len: amount of data available
 *
 * This function copies up to @len bytes to the
 * given buffer, and returns the number of bytes
 * consumed, which can actually be less than @len.
 *
 * If hash digest is enabled, the function will update the
 * hash while copying.
 * Combining these two operations doesn't buy us a lot (yet),
 * but in the future we could implement combined copy+crc,
 * just way we do for network layer checksums.
 */
static int
344 345 346
iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment, const void *ptr,
		       unsigned int len)
O
Olaf Kirch 已提交
347
{
348
	unsigned int copy = 0, copied = 0;
349

350 351 352 353 354
	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
		if (copied == len) {
			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
				  len);
			break;
O
Olaf Kirch 已提交
355
		}
356 357 358 359

		copy = min(len - copied, segment->size - segment->copied);
		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
		memcpy(segment->data + segment->copied, ptr + copied, copy);
O
Olaf Kirch 已提交
360 361 362 363 364 365 366 367 368 369
		copied += copy;
	}
	return copied;
}

static inline void
iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
		      unsigned char digest[ISCSI_DIGEST_SIZE])
{
	struct scatterlist sg;
370

O
Olaf Kirch 已提交
371 372 373 374 375 376
	sg_init_one(&sg, hdr, hdrlen);
	crypto_hash_digest(hash, &sg, hdrlen, digest);
}

static inline int
iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
377
		      struct iscsi_segment *segment)
O
Olaf Kirch 已提交
378
{
379
	if (!segment->digest_len)
O
Olaf Kirch 已提交
380 381
		return 1;

382 383
	if (memcmp(segment->recv_digest, segment->digest,
		   segment->digest_len)) {
O
Olaf Kirch 已提交
384 385 386 387 388 389 390 391
		debug_scsi("digest mismatch\n");
		return 0;
	}

	return 1;
}

/*
392
 * Helper function to set up segment buffer
O
Olaf Kirch 已提交
393 394
 */
static inline void
395 396
__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
397
{
398 399 400
	memset(segment, 0, sizeof(*segment));
	segment->total_size = size;
	segment->done = done;
O
Olaf Kirch 已提交
401 402

	if (hash) {
403
		segment->hash = hash;
O
Olaf Kirch 已提交
404 405 406 407 408
		crypto_hash_init(hash);
	}
}

static inline void
409 410 411
iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
			  size_t size, iscsi_segment_done_fn_t *done,
			  struct hash_desc *hash)
O
Olaf Kirch 已提交
412
{
413 414 415
	__iscsi_segment_init(segment, size, done, hash);
	segment->data = data;
	segment->size = size;
O
Olaf Kirch 已提交
416 417 418
}

static inline int
419 420 421 422
iscsi_segment_seek_sg(struct iscsi_segment *segment,
		      struct scatterlist *sg_list, unsigned int sg_count,
		      unsigned int offset, size_t size,
		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
423
{
424
	struct scatterlist *sg;
O
Olaf Kirch 已提交
425 426
	unsigned int i;

427 428 429 430 431 432 433 434
	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
		  offset, size);
	__iscsi_segment_init(segment, size, done, hash);
	for_each_sg(sg_list, sg, sg_count, i) {
		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
			   sg->offset);
		if (offset < sg->length) {
			iscsi_tcp_segment_init_sg(segment, sg, offset);
O
Olaf Kirch 已提交
435
			return 0;
436
		}
437
		offset -= sg->length;
438 439
	}

O
Olaf Kirch 已提交
440 441 442 443
	return ISCSI_ERR_DATA_OFFSET;
}

/**
444
 * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
O
Olaf Kirch 已提交
445 446 447 448 449 450 451 452 453 454 455
 * @tcp_conn: iscsi connection to prep for
 *
 * This function always passes NULL for the hash argument, because when this
 * function is called we do not yet know the final size of the header and want
 * to delay the digest processing until we know that.
 */
static void
iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
456
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
457 458 459 460 461 462 463 464 465
				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
				iscsi_tcp_hdr_recv_done, NULL);
}

/*
 * Handle incoming reply to any other type of command
 */
static int
iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
466
			 struct iscsi_segment *segment)
O
Olaf Kirch 已提交
467 468 469 470
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	int rc = 0;

471
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
472 473 474 475 476 477 478 479
		return ISCSI_ERR_DATA_DGST;

	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
			conn->data, tcp_conn->in.datalen);
	if (rc)
		return rc;

	iscsi_tcp_hdr_recv_prep(tcp_conn);
480 481 482
	return 0;
}

O
Olaf Kirch 已提交
483 484 485 486 487 488 489 490 491
static void
iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct hash_desc *rx_hash = NULL;

	if (conn->datadgst_en)
		rx_hash = &tcp_conn->rx_hash;

492
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
493 494 495 496
				conn->data, tcp_conn->in.datalen,
				iscsi_tcp_data_recv_done, rx_hash);
}

M
Mike Christie 已提交
497 498 499 500
/*
 * must be called with session lock
 */
static void
501
iscsi_tcp_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
502
{
503
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
504
	struct iscsi_r2t_info *r2t;
505

506 507 508 509 510 511 512
	/* flush ctask's r2t queues */
	while (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		debug_scsi("iscsi_tcp_cleanup_ctask pending r2t dropped\n");
	}

513 514 515 516 517 518
	r2t = tcp_ctask->r2t;
	if (r2t != NULL) {
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		tcp_ctask->r2t = NULL;
	}
519 520 521 522 523 524 525 526 527 528
}

/**
 * iscsi_data_rsp - SCSI Data-In Response processing
 * @conn: iscsi connection
 * @ctask: scsi command task
 **/
static int
iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
{
529 530 531
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
532
	struct iscsi_session *session = conn->session;
533
	struct scsi_cmnd *sc = ctask->sc;
534
	int datasn = be32_to_cpu(rhdr->datasn);
535
	unsigned total_in_length = scsi_in(sc)->length;
536

537
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
538
	if (tcp_conn->in.datalen == 0)
539 540
		return 0;

541 542 543
	if (tcp_ctask->exp_datasn != datasn) {
		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->datasn(%d)\n",
		          __FUNCTION__, tcp_ctask->exp_datasn, datasn);
544
		return ISCSI_ERR_DATASN;
545
	}
546

547
	tcp_ctask->exp_datasn++;
548

549
	tcp_ctask->data_offset = be32_to_cpu(rhdr->offset);
550
	if (tcp_ctask->data_offset + tcp_conn->in.datalen > total_in_length) {
551 552
		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
		          __FUNCTION__, tcp_ctask->data_offset,
553
		          tcp_conn->in.datalen, total_in_length);
554
		return ISCSI_ERR_DATA_OFFSET;
555
	}
556 557

	if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
558
		sc->result = (DID_OK << 16) | rhdr->cmd_status;
559
		conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
560 561
		if (rhdr->flags & (ISCSI_FLAG_DATA_UNDERFLOW |
		                   ISCSI_FLAG_DATA_OVERFLOW)) {
562 563 564
			int res_count = be32_to_cpu(rhdr->residual_count);

			if (res_count > 0 &&
565
			    (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW ||
566 567
			     res_count <= total_in_length))
				scsi_in(sc)->resid = res_count;
568
			else
569 570
				sc->result = (DID_BAD_TARGET << 16) |
					rhdr->cmd_status;
571
		}
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
	}

	conn->datain_pdus_cnt++;
	return 0;
}

/**
 * iscsi_solicit_data_init - initialize first Data-Out
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @r2t: R2T info
 *
 * Notes:
 *	Initialize first Data-Out within this R2T sequence and finds
 *	proper data_offset within this SCSI command.
 *
 *	This function is called with connection lock taken.
 **/
static void
iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
			struct iscsi_r2t_info *r2t)
{
	struct iscsi_data *hdr;

596
	hdr = &r2t->dtask.hdr;
597 598 599 600 601
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
602 603
	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
	hdr->itt = ctask->hdr->itt;
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
	hdr->exp_statsn = r2t->exp_statsn;
	hdr->offset = cpu_to_be32(r2t->data_offset);
	if (r2t->data_length > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
		hdr->flags = 0;
	} else {
		hton24(hdr->dlength, r2t->data_length);
		r2t->data_count = r2t->data_length;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}
	conn->dataout_pdus_cnt++;

	r2t->sent = 0;
}

/**
 * iscsi_r2t_rsp - iSCSI R2T Response processing
 * @conn: iscsi connection
 * @ctask: scsi command task
 **/
static int
iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
{
	struct iscsi_r2t_info *r2t;
	struct iscsi_session *session = conn->session;
630 631 632
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
633 634 635
	int r2tsn = be32_to_cpu(rhdr->r2tsn);
	int rc;

636
	if (tcp_conn->in.datalen) {
637 638 639
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2t with datalen %d\n",
				  tcp_conn->in.datalen);
640
		return ISCSI_ERR_DATALEN;
641
	}
642

643 644 645
	if (tcp_ctask->exp_datasn != r2tsn){
		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
		          __FUNCTION__, tcp_ctask->exp_datasn, r2tsn);
646
		return ISCSI_ERR_R2TSN;
647
	}
648 649

	/* fill-in new R2T associated with the task */
650 651
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);

652
	if (!ctask->sc || session->state != ISCSI_STATE_LOGGED_IN) {
653 654 655
		iscsi_conn_printk(KERN_INFO, conn,
				  "dropping R2T itt %d in recovery.\n",
				  ctask->itt);
656 657
		return 0;
	}
658

659
	rc = __kfifo_get(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
660 661 662 663
	BUG_ON(!rc);

	r2t->exp_statsn = rhdr->statsn;
	r2t->data_length = be32_to_cpu(rhdr->data_length);
664
	if (r2t->data_length == 0) {
665 666
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with zero data len\n");
667 668
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
669 670 671
		return ISCSI_ERR_DATALEN;
	}

672 673 674 675 676
	if (r2t->data_length > session->max_burst)
		debug_scsi("invalid R2T with data len %u and max burst %u."
			   "Attempting to execute request.\n",
			    r2t->data_length, session->max_burst);

677
	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
678
	if (r2t->data_offset + r2t->data_length > scsi_out(ctask->sc)->length) {
679 680 681
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with data len %u at offset %u "
				  "and total length %d\n", r2t->data_length,
682
				  r2t->data_offset, scsi_out(ctask->sc)->length);
683 684
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
685 686 687 688 689 690 691 692
		return ISCSI_ERR_DATALEN;
	}

	r2t->ttt = rhdr->ttt; /* no flip */
	r2t->solicit_datasn = 0;

	iscsi_solicit_data_init(conn, ctask, r2t);

693
	tcp_ctask->exp_datasn = r2tsn + 1;
694
	__kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
695
	conn->r2t_pdus_cnt++;
696 697

	iscsi_requeue_ctask(ctask);
698 699 700
	return 0;
}

O
Olaf Kirch 已提交
701 702 703 704 705
/*
 * Handle incoming reply to DataIn command
 */
static int
iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
706
			  struct iscsi_segment *segment)
O
Olaf Kirch 已提交
707 708 709 710 711
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
	int rc;

712
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
		return ISCSI_ERR_DATA_DGST;

	/* check for non-exceptional status */
	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
		if (rc)
			return rc;
	}

	iscsi_tcp_hdr_recv_prep(tcp_conn);
	return 0;
}

/**
 * iscsi_tcp_hdr_dissect - process PDU header
 * @conn: iSCSI connection
 * @hdr: PDU header
 *
 * This function analyzes the header of the PDU received,
 * and performs several sanity checks. If the PDU is accompanied
 * by data, the receive buffer is set up to copy the incoming data
 * to the correct location.
 */
736
static int
O
Olaf Kirch 已提交
737
iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
738
{
739
	int rc = 0, opcode, ahslen;
740
	struct iscsi_session *session = conn->session;
741
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
O
Olaf Kirch 已提交
742
	struct iscsi_cmd_task *ctask;
743 744

	/* verify PDU length */
745 746
	tcp_conn->in.datalen = ntoh24(hdr->dlength);
	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
747 748 749
		iscsi_conn_printk(KERN_ERR, conn,
				  "iscsi_tcp: datalen %d > %d\n",
				  tcp_conn->in.datalen, conn->max_recv_dlength);
750 751 752
		return ISCSI_ERR_DATALEN;
	}

O
Olaf Kirch 已提交
753 754 755
	/* Additional header segments. So far, we don't
	 * process additional headers.
	 */
756
	ahslen = hdr->hlength << 2;
757

758
	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
759
	/* verify itt (itt encoding: age+cid+itt) */
760
	rc = iscsi_verify_itt(conn, hdr->itt);
761
	if (rc)
762
		return rc;
763

O
Olaf Kirch 已提交
764 765
	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
		  opcode, ahslen, tcp_conn->in.datalen);
766

767 768
	switch(opcode) {
	case ISCSI_OP_SCSI_DATA_IN:
769 770 771 772
		ctask = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!ctask)
			return ISCSI_ERR_BAD_ITT;

773
		spin_lock(&conn->session->lock);
O
Olaf Kirch 已提交
774
		rc = iscsi_data_rsp(conn, ctask);
775
		spin_unlock(&conn->session->lock);
776 777
		if (rc)
			return rc;
O
Olaf Kirch 已提交
778 779 780
		if (tcp_conn->in.datalen) {
			struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
			struct hash_desc *rx_hash = NULL;
781
			struct scsi_data_buffer *sdb = scsi_in(ctask->sc);
O
Olaf Kirch 已提交
782 783 784 785

			/*
			 * Setup copy of Data-In into the Scsi_Cmnd
			 * Scatterlist case:
786
			 * We set up the iscsi_segment to point to the next
O
Olaf Kirch 已提交
787 788 789 790 791 792 793 794 795 796 797
			 * scatterlist entry to copy to. As we go along,
			 * we move on to the next scatterlist entry and
			 * update the digest per-entry.
			 */
			if (conn->datadgst_en)
				rx_hash = &tcp_conn->rx_hash;

			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
				  "datalen=%d)\n", tcp_conn,
				  tcp_ctask->data_offset,
				  tcp_conn->in.datalen);
798
			return iscsi_segment_seek_sg(&tcp_conn->in.segment,
799 800
						     sdb->table.sgl,
						     sdb->table.nents,
801 802 803 804
						     tcp_ctask->data_offset,
						     tcp_conn->in.datalen,
						     iscsi_tcp_process_data_in,
						     rx_hash);
O
Olaf Kirch 已提交
805
		}
806 807
		/* fall through */
	case ISCSI_OP_SCSI_CMD_RSP:
O
Olaf Kirch 已提交
808 809 810 811 812
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
813 814
		break;
	case ISCSI_OP_R2T:
815 816 817 818
		ctask = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!ctask)
			return ISCSI_ERR_BAD_ITT;

819 820
		if (ahslen)
			rc = ISCSI_ERR_AHSLEN;
821 822
		else if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) {
			spin_lock(&session->lock);
O
Olaf Kirch 已提交
823
			rc = iscsi_r2t_rsp(conn, ctask);
824 825
			spin_unlock(&session->lock);
		} else
826 827 828 829 830 831
			rc = ISCSI_ERR_PROTO;
		break;
	case ISCSI_OP_LOGIN_RSP:
	case ISCSI_OP_TEXT_RSP:
	case ISCSI_OP_REJECT:
	case ISCSI_OP_ASYNC_EVENT:
832 833 834 835 836
		/*
		 * It is possible that we could get a PDU with a buffer larger
		 * than 8K, but there are no targets that currently do this.
		 * For now we fail until we find a vendor that needs it
		 */
O
Olaf Kirch 已提交
837
		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
838 839 840 841 842 843
			iscsi_conn_printk(KERN_ERR, conn,
					  "iscsi_tcp: received buffer of "
					  "len %u but conn buffer is only %u "
					  "(opcode %0x)\n",
					  tcp_conn->in.datalen,
					  ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
844 845 846 847
			rc = ISCSI_ERR_PROTO;
			break;
		}

O
Olaf Kirch 已提交
848 849 850 851 852 853 854
		/* If there's data coming in with the response,
		 * receive it to the connection's buffer.
		 */
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
855
	/* fall through */
856 857
	case ISCSI_OP_LOGOUT_RSP:
	case ISCSI_OP_NOOP_IN:
858 859 860 861 862 863 864
	case ISCSI_OP_SCSI_TMFUNC_RSP:
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
		break;
	default:
		rc = ISCSI_ERR_BAD_OPCODE;
		break;
	}
865

O
Olaf Kirch 已提交
866 867 868 869 870 871
	if (rc == 0) {
		/* Anything that comes with data should have
		 * been handled above. */
		if (tcp_conn->in.datalen)
			return ISCSI_ERR_PROTO;
		iscsi_tcp_hdr_recv_prep(tcp_conn);
872 873
	}

O
Olaf Kirch 已提交
874
	return rc;
875 876
}

O
Olaf Kirch 已提交
877 878 879 880 881 882 883 884 885
/**
 * iscsi_tcp_hdr_recv_done - process PDU header
 *
 * This is the callback invoked when the PDU header has
 * been received. If the header is followed by additional
 * header segments, we go back for more data.
 */
static int
iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
886
			struct iscsi_segment *segment)
887
{
O
Olaf Kirch 已提交
888 889
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr;
890

O
Olaf Kirch 已提交
891 892 893 894 895
	/* Check if there are additional header segments
	 * *prior* to computing the digest, because we
	 * may need to go back to the caller for more.
	 */
	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
896
	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
O
Olaf Kirch 已提交
897 898 899 900 901 902 903 904 905
		/* Bump the header length - the caller will
		 * just loop around and get the AHS for us, and
		 * call again. */
		unsigned int ahslen = hdr->hlength << 2;

		/* Make sure we don't overflow */
		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
			return ISCSI_ERR_AHSLEN;

906 907
		segment->total_size += ahslen;
		segment->size += ahslen;
O
Olaf Kirch 已提交
908
		return 0;
909 910
	}

O
Olaf Kirch 已提交
911 912 913 914
	/* We're done processing the header. See if we're doing
	 * header digests; if so, set up the recv_digest buffer
	 * and go back for more. */
	if (conn->hdrdgst_en) {
915 916 917
		if (segment->digest_len == 0) {
			iscsi_tcp_segment_splice_digest(segment,
							segment->recv_digest);
O
Olaf Kirch 已提交
918
			return 0;
919
		}
O
Olaf Kirch 已提交
920
		iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
921 922
				      segment->total_copied - ISCSI_DIGEST_SIZE,
				      segment->digest);
923

924
		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
925
			return ISCSI_ERR_HDR_DGST;
926
	}
O
Olaf Kirch 已提交
927 928 929

	tcp_conn->in.hdr = hdr;
	return iscsi_tcp_hdr_dissect(conn, hdr);
930 931 932
}

/**
O
Olaf Kirch 已提交
933
 * iscsi_tcp_recv - TCP receive in sendfile fashion
934 935 936 937 938 939
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
 **/
static int
O
Olaf Kirch 已提交
940 941
iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
	       unsigned int offset, size_t len)
942 943
{
	struct iscsi_conn *conn = rd_desc->arg.data;
944
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
945
	struct iscsi_segment *segment = &tcp_conn->in.segment;
O
Olaf Kirch 已提交
946 947 948
	struct skb_seq_state seq;
	unsigned int consumed = 0;
	int rc = 0;
949

O
Olaf Kirch 已提交
950
	debug_tcp("in %d bytes\n", skb->len - offset);
951 952 953 954 955 956

	if (unlikely(conn->suspend_rx)) {
		debug_tcp("conn %d Rx suspended!\n", conn->id);
		return 0;
	}

O
Olaf Kirch 已提交
957 958 959 960
	skb_prepare_seq_read(skb, offset, skb->len, &seq);
	while (1) {
		unsigned int avail;
		const u8 *ptr;
961

O
Olaf Kirch 已提交
962
		avail = skb_seq_read(consumed, &ptr, &seq);
963 964 965
		if (avail == 0) {
			debug_tcp("no more data avail. Consumed %d\n",
				  consumed);
O
Olaf Kirch 已提交
966
			break;
967 968
		}
		BUG_ON(segment->copied >= segment->size);
O
Olaf Kirch 已提交
969 970

		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
971
		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
O
Olaf Kirch 已提交
972 973 974
		BUG_ON(rc == 0);
		consumed += rc;

975 976 977
		if (segment->total_copied >= segment->total_size) {
			debug_tcp("segment done\n");
			rc = segment->done(tcp_conn, segment);
O
Olaf Kirch 已提交
978 979 980
			if (rc != 0) {
				skb_abort_seq_read(&seq);
				goto error;
981
			}
982

O
Olaf Kirch 已提交
983
			/* The done() functions sets up the
984
			 * next segment. */
985 986
		}
	}
987
	skb_abort_seq_read(&seq);
O
Olaf Kirch 已提交
988 989
	conn->rxdata_octets += consumed;
	return consumed;
990

O
Olaf Kirch 已提交
991 992 993 994
error:
	debug_tcp("Error receiving PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return 0;
995 996 997 998 999 1000
}

static void
iscsi_tcp_data_ready(struct sock *sk, int flag)
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
1001
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1002 1003 1004 1005
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

1006
	/*
O
Olaf Kirch 已提交
1007
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
1008
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
1009
	 * hand us all the skbs that are available. iscsi_tcp_recv
1010 1011
	 * handled pdus that cross buffers or pdus that still need data.
	 */
1012
	rd_desc.arg.data = conn;
1013
	rd_desc.count = 1;
O
Olaf Kirch 已提交
1014
	tcp_read_sock(sk, &rd_desc, iscsi_tcp_recv);
1015 1016

	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
1017 1018 1019

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
1020
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
1021 1022 1023 1024 1025
}

static void
iscsi_tcp_state_change(struct sock *sk)
{
1026
	struct iscsi_tcp_conn *tcp_conn;
1027 1028 1029 1030 1031 1032 1033 1034 1035
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

M
Mike Christie 已提交
1036 1037 1038
	if ((sk->sk_state == TCP_CLOSE_WAIT ||
	     sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc)) {
1039 1040 1041 1042
		debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

1043 1044
	tcp_conn = conn->dd_data;
	old_state_change = tcp_conn->old_state_change;
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
static void
iscsi_write_space(struct sock *sk)
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
1059 1060 1061
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	tcp_conn->old_write_space(sk);
1062
	debug_tcp("iscsi_write_space: cid %d\n", conn->id);
1063
	scsi_queue_work(conn->session->host, &conn->xmitwork);
1064 1065 1066 1067 1068
}

static void
iscsi_conn_set_callbacks(struct iscsi_conn *conn)
{
1069 1070
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk = tcp_conn->sock->sk;
1071 1072 1073 1074

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
1075 1076 1077
	tcp_conn->old_data_ready = sk->sk_data_ready;
	tcp_conn->old_state_change = sk->sk_state_change;
	tcp_conn->old_write_space = sk->sk_write_space;
1078 1079 1080 1081 1082 1083 1084
	sk->sk_data_ready = iscsi_tcp_data_ready;
	sk->sk_state_change = iscsi_tcp_state_change;
	sk->sk_write_space = iscsi_write_space;
	write_unlock_bh(&sk->sk_callback_lock);
}

static void
1085
iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
1086
{
1087
	struct sock *sk = tcp_conn->sock->sk;
1088 1089 1090 1091

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
1092 1093 1094
	sk->sk_data_ready   = tcp_conn->old_data_ready;
	sk->sk_state_change = tcp_conn->old_state_change;
	sk->sk_write_space  = tcp_conn->old_write_space;
1095 1096 1097 1098 1099
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
1100 1101 1102 1103
 * iscsi_xmit - TCP transmit
 **/
static int
iscsi_xmit(struct iscsi_conn *conn)
1104
{
1105
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1106 1107 1108
	struct iscsi_segment *segment = &tcp_conn->out.segment;
	unsigned int consumed = 0;
	int rc = 0;
1109

1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
	while (1) {
		rc = iscsi_tcp_xmit_segment(tcp_conn, segment);
		if (rc < 0)
			goto error;
		if (rc == 0)
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
				if (rc < 0)
					goto error;
			}
		}
1126 1127
	}

1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
	debug_tcp("xmit %d bytes\n", consumed);

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
	debug_tcp("Error sending PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return rc;
1139 1140 1141
}

/**
1142 1143
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
1144
static inline int
1145
iscsi_tcp_xmit_qlen(struct iscsi_conn *conn)
1146
{
1147 1148
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_segment *segment = &tcp_conn->out.segment;
1149

1150
	return segment->total_copied - segment->total_size;
1151 1152 1153
}

static inline int
1154
iscsi_tcp_flush(struct iscsi_conn *conn)
1155
{
1156 1157 1158 1159 1160
	int rc;

	while (iscsi_tcp_xmit_qlen(conn)) {
		rc = iscsi_xmit(conn);
		if (rc == 0)
1161
			return -EAGAIN;
1162 1163
		if (rc < 0)
			return rc;
1164
	}
1165

1166
	return 0;
1167 1168
}

1169 1170 1171 1172 1173 1174 1175
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
static int
iscsi_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
			struct iscsi_segment *segment)
1176
{
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
	tcp_conn->out.segment = tcp_conn->out.data_segment;
	debug_tcp("Header done. Next segment size %u total_size %u\n",
		  tcp_conn->out.segment.size, tcp_conn->out.segment.total_size);
	return 0;
}

static void
iscsi_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, size_t hdrlen)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	debug_tcp("%s(%p%s)\n", __FUNCTION__, tcp_conn,
			conn->hdrdgst_en? ", digest enabled" : "");

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
	memset(&tcp_conn->out.data_segment, 0, sizeof(struct iscsi_segment));

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
	 * sure that both iscsi_tcp_ctask and mtask have
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
		iscsi_tcp_dgst_header(&tcp_conn->tx_hash, hdr, hdrlen,
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
	tcp_conn->out.hdr = hdr;

	iscsi_segment_init_linear(&tcp_conn->out.segment, hdr, hdrlen,
				iscsi_tcp_send_hdr_done, NULL);
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
iscsi_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			 unsigned int count, unsigned int offset,
			 unsigned int len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

	debug_tcp("%s(%p, offset=%d, datalen=%d%s)\n", __FUNCTION__,
			tcp_conn, offset, len,
			conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	return iscsi_segment_seek_sg(&tcp_conn->out.data_segment,
				   sg, count, offset, len,
				   NULL, tx_hash);
}

static void
iscsi_tcp_send_linear_data_prepare(struct iscsi_conn *conn, void *data,
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

	debug_tcp("%s(%p, datalen=%d%s)\n", __FUNCTION__, tcp_conn, len,
		  conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	iscsi_segment_init_linear(&tcp_conn->out.data_segment,
				data, len, NULL, tx_hash);
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
}

/**
 * iscsi_solicit_data_cont - initialize next Data-Out
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @r2t: R2T info
 * @left: bytes left to transfer
 *
 * Notes:
 *	Initialize next Data-Out within this R2T sequence and continue
 *	to process next Scatter-Gather element(if any) of this SCSI command.
 *
 *	Called under connection lock.
 **/
1282
static int
1283
iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
1284
			struct iscsi_r2t_info *r2t)
1285 1286
{
	struct iscsi_data *hdr;
1287 1288 1289 1290 1291 1292
	int new_offset, left;

	BUG_ON(r2t->data_length - r2t->sent < 0);
	left = r2t->data_length - r2t->sent;
	if (left == 0)
		return 0;
1293

1294
	hdr = &r2t->dtask.hdr;
1295 1296 1297 1298 1299
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
1300 1301
	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
	hdr->itt = ctask->hdr->itt;
1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
	hdr->exp_statsn = r2t->exp_statsn;
	new_offset = r2t->data_offset + r2t->sent;
	hdr->offset = cpu_to_be32(new_offset);
	if (left > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
	} else {
		hton24(hdr->dlength, left);
		r2t->data_count = left;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}

1314 1315
	conn->dataout_pdus_cnt++;
	return 1;
1316 1317 1318
}

/**
1319
 * iscsi_tcp_ctask - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
1320 1321 1322 1323
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @sc: scsi command
 **/
1324 1325
static int
iscsi_tcp_ctask_init(struct iscsi_cmd_task *ctask)
1326
{
1327
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1328 1329 1330
	struct iscsi_conn *conn = ctask->conn;
	struct scsi_cmnd *sc = ctask->sc;
	int err;
1331

1332
	BUG_ON(__kfifo_len(tcp_ctask->r2tqueue));
1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345
	tcp_ctask->sent = 0;
	tcp_ctask->exp_datasn = 0;

	/* Prepare PDU, optionally w/ immediate data */
	debug_scsi("ctask deq [cid %d itt 0x%x imm %d unsol %d]\n",
		    conn->id, ctask->itt, ctask->imm_count,
		    ctask->unsol_count);
	iscsi_tcp_send_hdr_prep(conn, ctask->hdr, ctask->hdr_len);

	if (!ctask->imm_count)
		return 0;

	/* If we have immediate data, attach a payload */
1346 1347
	err = iscsi_tcp_send_data_prep(conn, scsi_out(sc)->table.sgl,
				       scsi_out(sc)->table.nents,
1348 1349 1350 1351 1352 1353
				       0, ctask->imm_count);
	if (err)
		return err;
	tcp_ctask->sent += ctask->imm_count;
	ctask->imm_count = 0;
	return 0;
1354 1355 1356
}

/**
1357
 * iscsi_tcp_mtask_xmit - xmit management(immediate) task
1358 1359 1360 1361 1362 1363 1364 1365 1366
 * @conn: iscsi connection
 * @mtask: task management task
 *
 * Notes:
 *	The function can return -EAGAIN in which case caller must
 *	call it again later, or recover. '0' return code means successful
 *	xmit.
 **/
static int
1367
iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
1368
{
1369
	int rc;
1370

1371 1372 1373 1374
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
		return rc;
1375

A
Al Viro 已提交
1376
	if (mtask->hdr->itt == RESERVED_ITT) {
1377 1378 1379
		struct iscsi_session *session = conn->session;

		spin_lock_bh(&session->lock);
1380
		iscsi_free_mgmt_task(conn, mtask);
1381 1382
		spin_unlock_bh(&session->lock);
	}
1383

1384 1385 1386
	return 0;
}

1387 1388 1389 1390 1391 1392 1393 1394 1395
/*
 * iscsi_tcp_ctask_xmit - xmit normal PDU task
 * @conn: iscsi connection
 * @ctask: iscsi command task
 *
 * We're expected to return 0 when everything was transmitted succesfully,
 * -EAGAIN if there's still data in the queue, or != 0 for any other kind
 * of error.
 */
1396
static int
1397
iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
1398
{
1399
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1400
	struct scsi_cmnd *sc = ctask->sc;
1401
	struct scsi_data_buffer *sdb = scsi_out(sc);
1402
	int rc = 0;
1403

1404 1405 1406 1407
flush:
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
1408 1409
		return rc;

1410 1411 1412
	/* Are we done already? */
	if (sc->sc_data_direction != DMA_TO_DEVICE)
		return 0;
1413

1414 1415
	if (ctask->unsol_count != 0) {
		struct iscsi_data *hdr = &tcp_ctask->unsol_dtask.hdr;
1416

1417 1418 1419 1420 1421 1422
		/* Prepare a header for the unsolicited PDU.
		 * The amount of data we want to send will be
		 * in ctask->data_count.
		 * FIXME: return the data count instead.
		 */
		iscsi_prep_unsolicit_data_pdu(ctask, hdr);
1423

1424 1425
		debug_tcp("unsol dout [itt 0x%x doff %d dlen %d]\n",
				ctask->itt, tcp_ctask->sent, ctask->data_count);
1426

1427
		iscsi_tcp_send_hdr_prep(conn, hdr, sizeof(*hdr));
1428 1429
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
					      sdb->table.nents, tcp_ctask->sent,
1430
					      ctask->data_count);
1431
		if (rc)
1432 1433 1434 1435 1436 1437 1438
			goto fail;
		tcp_ctask->sent += ctask->data_count;
		ctask->unsol_count -= ctask->data_count;
		goto flush;
	} else {
		struct iscsi_session *session = conn->session;
		struct iscsi_r2t_info *r2t;
1439

1440
		/* All unsolicited PDUs sent. Check for solicited PDUs.
1441
		 */
1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452
		spin_lock_bh(&session->lock);
		r2t = tcp_ctask->r2t;
		if (r2t != NULL) {
			/* Continue with this R2T? */
			if (!iscsi_solicit_data_cont(conn, ctask, r2t)) {
				debug_scsi("  done with r2t %p\n", r2t);

				__kfifo_put(tcp_ctask->r2tpool.queue,
					    (void*)&r2t, sizeof(void*));
				tcp_ctask->r2t = r2t = NULL;
			}
1453 1454
		}

1455
		if (r2t == NULL) {
1456 1457
			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
				    sizeof(void*));
1458
			r2t = tcp_ctask->r2t;
1459
		}
1460
		spin_unlock_bh(&session->lock);
1461

1462 1463 1464 1465
		/* Waiting for more R2Ts to arrive. */
		if (r2t == NULL) {
			debug_tcp("no R2Ts yet\n");
			return 0;
1466 1467
		}

1468 1469 1470
		debug_scsi("sol dout %p [dsn %d itt 0x%x doff %d dlen %d]\n",
			r2t, r2t->solicit_datasn - 1, ctask->itt,
			r2t->data_offset + r2t->sent, r2t->data_count);
1471

1472 1473
		iscsi_tcp_send_hdr_prep(conn, &r2t->dtask.hdr,
					sizeof(struct iscsi_hdr));
1474

1475 1476
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
					      sdb->table.nents,
1477 1478
					      r2t->data_offset + r2t->sent,
					      r2t->data_count);
1479
		if (rc)
1480 1481 1482 1483
			goto fail;
		tcp_ctask->sent += r2t->data_count;
		r2t->sent += r2t->data_count;
		goto flush;
1484 1485
	}
	return 0;
1486 1487 1488
fail:
	iscsi_conn_failure(conn, rc);
	return -EIO;
1489 1490
}

1491 1492
static struct iscsi_cls_conn *
iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
1493
{
1494 1495 1496
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
1497

1498
	cls_conn = iscsi_conn_setup(cls_session, sizeof(*tcp_conn), conn_idx);
1499 1500 1501
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
1502
	/*
1503 1504
	 * due to strange issues with iser these are not set
	 * in iscsi_conn_setup
1505
	 */
1506
	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
1507

1508
	tcp_conn = conn->dd_data;
1509
	tcp_conn->iscsi_conn = conn;
1510

1511 1512 1513
	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->tx_hash.flags = 0;
1514
	if (IS_ERR(tcp_conn->tx_hash.tfm))
1515
		goto free_conn;
1516

1517 1518 1519
	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->rx_hash.flags = 0;
1520
	if (IS_ERR(tcp_conn->rx_hash.tfm))
1521 1522
		goto free_tx_tfm;

1523
	return cls_conn;
1524

1525
free_tx_tfm:
1526
	crypto_free_hash(tcp_conn->tx_hash.tfm);
1527
free_conn:
1528 1529 1530 1531 1532
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
1533 1534
	iscsi_conn_teardown(cls_conn);
	return NULL;
1535 1536
}

1537 1538 1539
static void
iscsi_tcp_release_conn(struct iscsi_conn *conn)
{
1540
	struct iscsi_session *session = conn->session;
1541
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1542
	struct socket *sock = tcp_conn->sock;
1543

1544
	if (!sock)
1545 1546
		return;

1547
	sock_hold(sock->sk);
1548
	iscsi_conn_restore_callbacks(tcp_conn);
1549
	sock_put(sock->sk);
1550

1551
	spin_lock_bh(&session->lock);
1552 1553
	tcp_conn->sock = NULL;
	conn->recv_lock = NULL;
1554 1555
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
1556 1557
}

1558
static void
1559
iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
1560
{
1561 1562
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1563

1564
	iscsi_tcp_release_conn(conn);
1565

P
Pete Wyckoff 已提交
1566 1567 1568 1569
	if (tcp_conn->tx_hash.tfm)
		crypto_free_hash(tcp_conn->tx_hash.tfm);
	if (tcp_conn->rx_hash.tfm)
		crypto_free_hash(tcp_conn->rx_hash.tfm);
1570

1571
	iscsi_conn_teardown(cls_conn);
1572
}
1573

1574 1575 1576 1577 1578 1579 1580 1581 1582
static void
iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
	struct iscsi_conn *conn = cls_conn->dd_data;

	iscsi_conn_stop(cls_conn, flag);
	iscsi_tcp_release_conn(conn);
}

1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
			      char *buf, int *port,
			      int (*getname)(struct socket *, struct sockaddr *,
					int *addrlen))
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

1593
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr));
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

1623 1624
static int
iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
1625
		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
1626 1627
		    int is_leading)
{
1628 1629
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
	struct iscsi_host *ihost = shost_priv(shost);
1630 1631 1632 1633 1634
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk;
	struct socket *sock;
	int err;
1635

1636
	/* lookup for existing socket */
1637
	sock = sockfd_lookup((int)transport_eph, &err);
1638
	if (!sock) {
1639 1640
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
1641
		return -EEXIST;
1642
	}
1643 1644 1645 1646 1647 1648 1649 1650 1651 1652
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
	err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
				 &conn->portal_port, kernel_getpeername);
	if (err)
		goto free_socket;

1653 1654
	err = iscsi_tcp_get_addr(conn, sock, ihost->local_address,
				&ihost->local_port, kernel_getsockname);
1655 1656
	if (err)
		goto free_socket;
1657

1658 1659
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
1660
		goto free_socket;
1661

1662 1663
	/* bind iSCSI connection and socket */
	tcp_conn->sock = sock;
1664

1665 1666 1667 1668 1669
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
1670

1671
	/* FIXME: disable Nagle's algorithm */
1672

1673 1674 1675 1676 1677 1678 1679 1680 1681 1682
	/*
	 * Intercept TCP callbacks for sendfile like receive
	 * processing.
	 */
	conn->recv_lock = &sk->sk_callback_lock;
	iscsi_conn_set_callbacks(conn);
	tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
1683
	iscsi_tcp_hdr_recv_prep(tcp_conn);
1684
	return 0;
1685 1686 1687 1688

free_socket:
	sockfd_put(sock);
	return err;
1689 1690
}

1691
/* called with host lock */
M
Mike Christie 已提交
1692
static void
1693
iscsi_tcp_mtask_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
1694
{
1695 1696 1697 1698 1699 1700 1701 1702 1703
	debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt);

	/* Prepare PDU, optionally w/ immediate data */
	iscsi_tcp_send_hdr_prep(conn, mtask->hdr, sizeof(*mtask->hdr));

	/* If we have immediate data, attach a payload */
	if (mtask->data_count)
		iscsi_tcp_send_linear_data_prepare(conn, mtask->data,
						   mtask->data_count);
1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716
}

static int
iscsi_r2tpool_alloc(struct iscsi_session *session)
{
	int i;
	int cmd_i;

	/*
	 * initialize per-task: R2T pool and xmit queue
	 */
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
	        struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
1717
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1718 1719 1720 1721 1722 1723 1724 1725

		/*
		 * pre-allocated x4 as much r2ts to handle race when
		 * target acks DataOut faster than we data_xmit() queues
		 * could replenish r2tqueue.
		 */

		/* R2T pool */
1726
		if (iscsi_pool_init(&tcp_ctask->r2tpool, session->max_r2t * 4, NULL,
1727
				    sizeof(struct iscsi_r2t_info))) {
1728 1729 1730 1731
			goto r2t_alloc_fail;
		}

		/* R2T xmit queue */
1732
		tcp_ctask->r2tqueue = kfifo_alloc(
1733
		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
1734
		if (tcp_ctask->r2tqueue == ERR_PTR(-ENOMEM)) {
1735
			iscsi_pool_free(&tcp_ctask->r2tpool);
1736 1737 1738 1739 1740 1741 1742 1743
			goto r2t_alloc_fail;
		}
	}

	return 0;

r2t_alloc_fail:
	for (i = 0; i < cmd_i; i++) {
1744 1745 1746 1747
		struct iscsi_cmd_task *ctask = session->cmds[i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;

		kfifo_free(tcp_ctask->r2tqueue);
1748
		iscsi_pool_free(&tcp_ctask->r2tpool);
1749 1750 1751 1752 1753 1754 1755 1756 1757 1758
	}
	return -ENOMEM;
}

static void
iscsi_r2tpool_free(struct iscsi_session *session)
{
	int i;

	for (i = 0; i < session->cmds_max; i++) {
1759 1760
		struct iscsi_cmd_task *ctask = session->cmds[i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1761

1762
		kfifo_free(tcp_ctask->r2tqueue);
1763
		iscsi_pool_free(&tcp_ctask->r2tpool);
1764 1765 1766 1767
	}
}

static int
1768
iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
1769
		     char *buf, int buflen)
1770
{
1771
	struct iscsi_conn *conn = cls_conn->dd_data;
1772
	struct iscsi_session *session = conn->session;
1773
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1774
	int value;
1775 1776 1777

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
1778
		iscsi_set_param(cls_conn, param, buf, buflen);
1779 1780
		break;
	case ISCSI_PARAM_DATADGST_EN:
1781
		iscsi_set_param(cls_conn, param, buf, buflen);
1782 1783
		tcp_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
1784 1785
		break;
	case ISCSI_PARAM_MAX_R2T:
1786
		sscanf(buf, "%d", &value);
1787 1788 1789
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
1790 1791
			break;
		iscsi_r2tpool_free(session);
1792
		iscsi_set_param(cls_conn, param, buf, buflen);
1793 1794 1795 1796
		if (iscsi_r2tpool_alloc(session))
			return -ENOMEM;
		break;
	default:
1797
		return iscsi_set_param(cls_conn, param, buf, buflen);
1798 1799 1800 1801 1802 1803
	}

	return 0;
}

static int
1804 1805
iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
			 enum iscsi_param param, char *buf)
1806
{
1807
	struct iscsi_conn *conn = cls_conn->dd_data;
1808
	int len;
1809 1810

	switch(param) {
1811
	case ISCSI_PARAM_CONN_PORT:
1812 1813 1814
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
1815
		break;
1816
	case ISCSI_PARAM_CONN_ADDRESS:
1817 1818 1819
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
1820 1821
		break;
	default:
1822
		return iscsi_conn_get_param(cls_conn, param, buf);
1823 1824 1825 1826 1827
	}

	return len;
}

1828
static void
1829
iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
1830
{
1831
	struct iscsi_conn *conn = cls_conn->dd_data;
1832
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844

	stats->txdata_octets = conn->txdata_octets;
	stats->rxdata_octets = conn->rxdata_octets;
	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
	stats->dataout_pdus = conn->dataout_pdus_cnt;
	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
	stats->datain_pdus = conn->datain_pdus_cnt;
	stats->r2t_pdus = conn->r2t_pdus_cnt;
	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
1845
	stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
1846
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
1847
	stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
1848 1849 1850 1851
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
}

1852
static struct iscsi_cls_session *
1853
iscsi_tcp_session_create(struct Scsi_Host *shost, uint16_t cmds_max,
1854 1855
			 uint16_t qdepth, uint32_t initial_cmdsn,
			 uint32_t *hostno)
1856
{
1857 1858 1859
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
	int cmd_i;
1860

1861 1862 1863 1864 1865 1866
	if (shost) {
		printk(KERN_ERR "iscsi_tcp: invalid shost %d.\n",
		       shost->host_no);
		return NULL;
	}

1867
	shost = iscsi_host_alloc(&iscsi_sht, 0, qdepth);
1868
	if (!shost)
1869
		return NULL;
1870 1871 1872 1873 1874
	shost->transportt = iscsi_tcp_scsi_transport;
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
	shost->max_cmd_len = 16;
1875
	shost->can_queue = cmds_max;
1876

1877
	if (iscsi_host_add(shost, NULL))
1878 1879 1880 1881 1882 1883 1884 1885 1886 1887
		goto free_host;
	*hostno = shost->host_no;

	cls_session = iscsi_session_setup(&iscsi_tcp_transport, shost, cmds_max,
					  sizeof(struct iscsi_tcp_cmd_task),
					  sizeof(struct iscsi_tcp_mgmt_task),
					  initial_cmdsn);
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
1888

1889
	shost->can_queue = session->cmds_max;
1890 1891 1892 1893
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
		struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;

1894 1895
		ctask->hdr = &tcp_ctask->hdr.cmd_hdr;
		ctask->hdr_max = sizeof(tcp_ctask->hdr) - ISCSI_DIGEST_SIZE;
1896 1897 1898 1899 1900 1901
	}

	for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++) {
		struct iscsi_mgmt_task *mtask = session->mgmt_cmds[cmd_i];
		struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;

1902
		mtask->hdr = (struct iscsi_hdr *) &tcp_mtask->hdr;
1903 1904
	}

1905 1906
	if (iscsi_r2tpool_alloc(session))
		goto remove_session;
1907 1908
	return cls_session;

1909
remove_session:
1910
	iscsi_session_teardown(cls_session);
1911
remove_host:
1912
	iscsi_host_remove(shost);
1913
free_host:
1914
	iscsi_host_free(shost);
1915 1916 1917 1918 1919
	return NULL;
}

static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
{
1920 1921 1922 1923
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);

	iscsi_r2tpool_free(cls_session->dd_data);

1924 1925
	iscsi_host_remove(shost);
	iscsi_host_free(shost);
1926 1927
}

1928 1929
static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
{
1930
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
1931 1932 1933 1934
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

1935
static struct scsi_host_template iscsi_sht = {
1936
	.module			= THIS_MODULE,
1937
	.name			= "iSCSI Initiator over TCP/IP",
1938 1939
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
1940
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
1941
	.sg_tablesize		= 4096,
1942
	.max_sectors		= 0xFFFF,
1943 1944
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
1945
	.eh_device_reset_handler= iscsi_eh_device_reset,
1946 1947
	.eh_host_reset_handler	= iscsi_eh_host_reset,
	.use_clustering         = DISABLE_CLUSTERING,
1948
	.slave_configure        = iscsi_tcp_slave_configure,
1949 1950 1951 1952
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

1953 1954 1955 1956 1957
static struct iscsi_transport iscsi_tcp_transport = {
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
1971
				  ISCSI_CONN_ADDRESS |
1972 1973 1974
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
1975 1976
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
1977
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
1978 1979 1980
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
				  ISCSI_LU_RESET_TMO |
				  ISCSI_PING_TMO | ISCSI_RECV_TMO,
1981
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
1982 1983
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
1984 1985 1986 1987 1988 1989 1990
	/* session management */
	.create_session		= iscsi_tcp_session_create,
	.destroy_session	= iscsi_tcp_session_destroy,
	/* connection management */
	.create_conn		= iscsi_tcp_conn_create,
	.bind_conn		= iscsi_tcp_conn_bind,
	.destroy_conn		= iscsi_tcp_conn_destroy,
1991
	.set_param		= iscsi_conn_set_param,
1992
	.get_conn_param		= iscsi_tcp_conn_get_param,
1993
	.get_session_param	= iscsi_session_get_param,
1994
	.start_conn		= iscsi_conn_start,
1995
	.stop_conn		= iscsi_tcp_conn_stop,
1996
	/* iscsi host params */
1997
	.get_host_param		= iscsi_host_get_param,
1998
	.set_host_param		= iscsi_host_set_param,
1999
	/* IO */
2000 2001
	.send_pdu		= iscsi_conn_send_pdu,
	.get_stats		= iscsi_conn_get_stats,
2002 2003
	.init_cmd_task		= iscsi_tcp_ctask_init,
	.init_mgmt_task		= iscsi_tcp_mtask_init,
2004 2005 2006 2007
	.xmit_cmd_task		= iscsi_tcp_ctask_xmit,
	.xmit_mgmt_task		= iscsi_tcp_mtask_xmit,
	.cleanup_cmd_task	= iscsi_tcp_cleanup_ctask,
	/* recovery */
M
Mike Christie 已提交
2008
	.session_recovery_timedout = iscsi_session_recovery_timedout,
2009 2010 2011 2012 2013 2014
};

static int __init
iscsi_tcp_init(void)
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
2015 2016
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
2017 2018 2019
		return -EINVAL;
	}

2020 2021 2022
	iscsi_tcp_scsi_transport = iscsi_register_transport(
							&iscsi_tcp_transport);
	if (!iscsi_tcp_scsi_transport)
2023
		return -ENODEV;
2024

2025
	return 0;
2026 2027 2028 2029 2030 2031 2032 2033 2034 2035
}

static void __exit
iscsi_tcp_exit(void)
{
	iscsi_unregister_transport(&iscsi_tcp_transport);
}

module_init(iscsi_tcp_init);
module_exit(iscsi_tcp_exit);