iscsi_tcp.c 53.9 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/list.h>
#include <linux/inet.h>
M
Mike Christie 已提交
32
#include <linux/file.h>
33 34 35 36 37 38 39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41 42 43 44 45 46 47 48 49 50
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");
O
Olaf Kirch 已提交
51
#undef DEBUG_TCP
52 53 54
#define DEBUG_ASSERT

#ifdef DEBUG_TCP
55
#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
56 57 58 59 60 61 62 63 64 65 66
#else
#define debug_tcp(fmt...)
#endif

#ifndef DEBUG_ASSERT
#ifdef BUG_ON
#undef BUG_ON
#endif
#define BUG_ON(expr)
#endif

67 68 69 70
static struct scsi_transport_template *iscsi_tcp_scsi_transport;
static struct scsi_host_template iscsi_sht;
static struct iscsi_transport iscsi_tcp_transport;

71 72 73
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

O
Olaf Kirch 已提交
74
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
75
				   struct iscsi_segment *segment);
76

O
Olaf Kirch 已提交
77
/*
78
 * Scatterlist handling: inside the iscsi_segment, we
O
Olaf Kirch 已提交
79 80 81 82 83 84 85 86 87 88 89 90
 * remember an index into the scatterlist, and set data/size
 * to the current scatterlist entry. For highmem pages, we
 * kmap as needed.
 *
 * Note that the page is unmapped when we return from
 * TCP's data_ready handler, so we may end up mapping and
 * unmapping the same page repeatedly. The whole reason
 * for this is that we shouldn't keep the page mapped
 * outside the softirq.
 */

/**
91 92 93
 * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
 * @segment: the buffer object
 * @sg: scatterlist
O
Olaf Kirch 已提交
94 95
 * @offset: byte offset into that sg entry
 *
96
 * This function sets up the segment so that subsequent
O
Olaf Kirch 已提交
97 98 99 100
 * data is copied to the indicated sg entry, at the given
 * offset.
 */
static inline void
101 102
iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
			  struct scatterlist *sg, unsigned int offset)
O
Olaf Kirch 已提交
103
{
104 105 106 107 108
	segment->sg = sg;
	segment->sg_offset = offset;
	segment->size = min(sg->length - offset,
			    segment->total_size - segment->total_copied);
	segment->data = NULL;
O
Olaf Kirch 已提交
109 110 111
}

/**
112 113 114
 * iscsi_tcp_segment_map - map the current S/G page
 * @segment: iscsi_segment
 * @recv: 1 if called from recv path
O
Olaf Kirch 已提交
115 116 117 118 119 120
 *
 * We only need to possibly kmap data if scatter lists are being used,
 * because the iscsi passthrough and internal IO paths will never use high
 * mem pages.
 */
static inline void
121
iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
O
Olaf Kirch 已提交
122 123 124
{
	struct scatterlist *sg;

125
	if (segment->data != NULL || !segment->sg)
O
Olaf Kirch 已提交
126 127
		return;

128 129
	sg = segment->sg;
	BUG_ON(segment->sg_mapped);
O
Olaf Kirch 已提交
130
	BUG_ON(sg->length == 0);
131 132 133 134 135 136 137 138 139 140 141 142 143 144

	/*
	 * If the page count is greater than one it is ok to send
	 * to the network layer's zero copy send path. If not we
	 * have to go the slow sendmsg path. We always map for the
	 * recv path.
	 */
	if (page_count(sg_page(sg)) >= 1 && !recv)
		return;

	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
		  segment);
	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
O
Olaf Kirch 已提交
145 146 147
}

static inline void
148
iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
O
Olaf Kirch 已提交
149
{
150 151 152 153 154 155 156
	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);

	if (segment->sg_mapped) {
		debug_tcp("iscsi_tcp_segment_unmap valid\n");
		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
		segment->sg_mapped = NULL;
		segment->data = NULL;
O
Olaf Kirch 已提交
157 158 159 160 161 162 163
	}
}

/*
 * Splice the digest buffer into the buffer
 */
static inline void
164
iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
O
Olaf Kirch 已提交
165
{
166 167 168 169 170 171 172
	segment->data = digest;
	segment->digest_len = ISCSI_DIGEST_SIZE;
	segment->total_size += ISCSI_DIGEST_SIZE;
	segment->size = ISCSI_DIGEST_SIZE;
	segment->copied = 0;
	segment->sg = NULL;
	segment->hash = NULL;
O
Olaf Kirch 已提交
173 174 175
}

/**
176 177 178 179
 * iscsi_tcp_segment_done - check whether the segment is complete
 * @segment: iscsi segment to check
 * @recv: set to one of this is called from the recv path
 * @copied: number of bytes copied
O
Olaf Kirch 已提交
180
 *
181
 * Check if we're done receiving this segment. If the receive
O
Olaf Kirch 已提交
182 183 184 185 186 187 188 189
 * buffer is full but we expect more data, move on to the
 * next entry in the scatterlist.
 *
 * If the amount of data we received isn't a multiple of 4,
 * we will transparently receive the pad bytes, too.
 *
 * This function must be re-entrant.
 */
190
static inline int
191
iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv, unsigned copied)
192
{
O
Olaf Kirch 已提交
193
	static unsigned char padbuf[ISCSI_PAD_LEN];
194
	struct scatterlist sg;
195
	unsigned int pad;
O
Olaf Kirch 已提交
196

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
		  segment->size, recv ? "recv" : "xmit");
	if (segment->hash && copied) {
		/*
		 * If a segment is kmapd we must unmap it before sending
		 * to the crypto layer since that will try to kmap it again.
		 */
		iscsi_tcp_segment_unmap(segment);

		if (!segment->data) {
			sg_init_table(&sg, 1);
			sg_set_page(&sg, sg_page(segment->sg), copied,
				    segment->copied + segment->sg_offset +
							segment->sg->offset);
		} else
			sg_init_one(&sg, segment->data + segment->copied,
				    copied);
		crypto_hash_update(segment->hash, &sg, copied);
	}

	segment->copied += copied;
	if (segment->copied < segment->size) {
		iscsi_tcp_segment_map(segment, recv);
O
Olaf Kirch 已提交
220 221
		return 0;
	}
222

223 224 225
	segment->total_copied += segment->copied;
	segment->copied = 0;
	segment->size = 0;
226

O
Olaf Kirch 已提交
227
	/* Unmap the current scatterlist page, if there is one. */
228
	iscsi_tcp_segment_unmap(segment);
O
Olaf Kirch 已提交
229 230

	/* Do we have more scatterlist entries? */
231 232 233
	debug_tcp("total copied %u total size %u\n", segment->total_copied,
		   segment->total_size);
	if (segment->total_copied < segment->total_size) {
O
Olaf Kirch 已提交
234
		/* Proceed to the next entry in the scatterlist. */
235 236 237 238
		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
					  0);
		iscsi_tcp_segment_map(segment, recv);
		BUG_ON(segment->size == 0);
O
Olaf Kirch 已提交
239 240 241 242
		return 0;
	}

	/* Do we need to handle padding? */
243
	pad = iscsi_padding(segment->total_copied);
244
	if (pad != 0) {
O
Olaf Kirch 已提交
245
		debug_tcp("consume %d pad bytes\n", pad);
246 247 248
		segment->total_size += pad;
		segment->size = pad;
		segment->data = padbuf;
O
Olaf Kirch 已提交
249 250 251 252
		return 0;
	}

	/*
253
	 * Set us up for transferring the data digest. hdr digest
O
Olaf Kirch 已提交
254 255
	 * is completely handled in hdr done function.
	 */
256 257 258 259 260
	if (segment->hash) {
		crypto_hash_final(segment->hash, segment->digest);
		iscsi_tcp_segment_splice_digest(segment,
				 recv ? segment->recv_digest : segment->digest);
		return 0;
O
Olaf Kirch 已提交
261
	}
262

O
Olaf Kirch 已提交
263 264
	return 1;
}
265

O
Olaf Kirch 已提交
266
/**
267
 * iscsi_tcp_xmit_segment - transmit segment
O
Olaf Kirch 已提交
268
 * @tcp_conn: the iSCSI TCP connection
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
static int
iscsi_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment)
{
	struct socket *sk = tcp_conn->sock;
	unsigned int copied = 0;
	int r = 0;

	while (!iscsi_tcp_segment_done(segment, 0, r)) {
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_conn->sendpage(sk, sg_page(sg), offset, copy,
					       flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			if (copied || r == -EAGAIN)
				break;
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_tcp_segment_recv - copy data to segment
 * @tcp_conn: the iSCSI TCP connection
 * @segment: the buffer to copy to
O
Olaf Kirch 已提交
330 331 332 333 334 335 336 337 338 339 340 341 342 343
 * @ptr: data pointer
 * @len: amount of data available
 *
 * This function copies up to @len bytes to the
 * given buffer, and returns the number of bytes
 * consumed, which can actually be less than @len.
 *
 * If hash digest is enabled, the function will update the
 * hash while copying.
 * Combining these two operations doesn't buy us a lot (yet),
 * but in the future we could implement combined copy+crc,
 * just way we do for network layer checksums.
 */
static int
344 345 346
iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment, const void *ptr,
		       unsigned int len)
O
Olaf Kirch 已提交
347
{
348
	unsigned int copy = 0, copied = 0;
349

350 351 352 353 354
	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
		if (copied == len) {
			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
				  len);
			break;
O
Olaf Kirch 已提交
355
		}
356 357 358 359

		copy = min(len - copied, segment->size - segment->copied);
		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
		memcpy(segment->data + segment->copied, ptr + copied, copy);
O
Olaf Kirch 已提交
360 361 362 363 364 365 366 367 368 369
		copied += copy;
	}
	return copied;
}

static inline void
iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
		      unsigned char digest[ISCSI_DIGEST_SIZE])
{
	struct scatterlist sg;
370

O
Olaf Kirch 已提交
371 372 373 374 375 376
	sg_init_one(&sg, hdr, hdrlen);
	crypto_hash_digest(hash, &sg, hdrlen, digest);
}

static inline int
iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
377
		      struct iscsi_segment *segment)
O
Olaf Kirch 已提交
378
{
379
	if (!segment->digest_len)
O
Olaf Kirch 已提交
380 381
		return 1;

382 383
	if (memcmp(segment->recv_digest, segment->digest,
		   segment->digest_len)) {
O
Olaf Kirch 已提交
384 385 386 387 388 389 390 391
		debug_scsi("digest mismatch\n");
		return 0;
	}

	return 1;
}

/*
392
 * Helper function to set up segment buffer
O
Olaf Kirch 已提交
393 394
 */
static inline void
395 396
__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
397
{
398 399 400
	memset(segment, 0, sizeof(*segment));
	segment->total_size = size;
	segment->done = done;
O
Olaf Kirch 已提交
401 402

	if (hash) {
403
		segment->hash = hash;
O
Olaf Kirch 已提交
404 405 406 407 408
		crypto_hash_init(hash);
	}
}

static inline void
409 410 411
iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
			  size_t size, iscsi_segment_done_fn_t *done,
			  struct hash_desc *hash)
O
Olaf Kirch 已提交
412
{
413 414 415
	__iscsi_segment_init(segment, size, done, hash);
	segment->data = data;
	segment->size = size;
O
Olaf Kirch 已提交
416 417 418
}

static inline int
419 420 421 422
iscsi_segment_seek_sg(struct iscsi_segment *segment,
		      struct scatterlist *sg_list, unsigned int sg_count,
		      unsigned int offset, size_t size,
		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
423
{
424
	struct scatterlist *sg;
O
Olaf Kirch 已提交
425 426
	unsigned int i;

427 428 429 430 431 432 433 434
	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
		  offset, size);
	__iscsi_segment_init(segment, size, done, hash);
	for_each_sg(sg_list, sg, sg_count, i) {
		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
			   sg->offset);
		if (offset < sg->length) {
			iscsi_tcp_segment_init_sg(segment, sg, offset);
O
Olaf Kirch 已提交
435
			return 0;
436
		}
437
		offset -= sg->length;
438 439
	}

O
Olaf Kirch 已提交
440 441 442 443
	return ISCSI_ERR_DATA_OFFSET;
}

/**
444
 * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
O
Olaf Kirch 已提交
445 446 447 448 449 450 451 452 453 454 455
 * @tcp_conn: iscsi connection to prep for
 *
 * This function always passes NULL for the hash argument, because when this
 * function is called we do not yet know the final size of the header and want
 * to delay the digest processing until we know that.
 */
static void
iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
456
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
457 458 459 460 461 462 463 464 465
				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
				iscsi_tcp_hdr_recv_done, NULL);
}

/*
 * Handle incoming reply to any other type of command
 */
static int
iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
466
			 struct iscsi_segment *segment)
O
Olaf Kirch 已提交
467 468 469 470
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	int rc = 0;

471
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
472 473 474 475 476 477 478 479
		return ISCSI_ERR_DATA_DGST;

	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
			conn->data, tcp_conn->in.datalen);
	if (rc)
		return rc;

	iscsi_tcp_hdr_recv_prep(tcp_conn);
480 481 482
	return 0;
}

O
Olaf Kirch 已提交
483 484 485 486 487 488 489 490 491
static void
iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct hash_desc *rx_hash = NULL;

	if (conn->datadgst_en)
		rx_hash = &tcp_conn->rx_hash;

492
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
493 494 495 496
				conn->data, tcp_conn->in.datalen,
				iscsi_tcp_data_recv_done, rx_hash);
}

M
Mike Christie 已提交
497 498 499 500
/*
 * must be called with session lock
 */
static void
501
iscsi_tcp_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
502
{
503
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
504
	struct iscsi_r2t_info *r2t;
505

506 507 508 509 510 511 512
	/* flush ctask's r2t queues */
	while (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		debug_scsi("iscsi_tcp_cleanup_ctask pending r2t dropped\n");
	}

513 514 515 516 517 518
	r2t = tcp_ctask->r2t;
	if (r2t != NULL) {
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		tcp_ctask->r2t = NULL;
	}
519 520 521 522 523 524 525 526 527 528
}

/**
 * iscsi_data_rsp - SCSI Data-In Response processing
 * @conn: iscsi connection
 * @ctask: scsi command task
 **/
static int
iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
{
529 530 531
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
532
	struct iscsi_session *session = conn->session;
533
	struct scsi_cmnd *sc = ctask->sc;
534
	int datasn = be32_to_cpu(rhdr->datasn);
535
	unsigned total_in_length = scsi_in(sc)->length;
536

537
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
538
	if (tcp_conn->in.datalen == 0)
539 540
		return 0;

541 542 543
	if (tcp_ctask->exp_datasn != datasn) {
		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->datasn(%d)\n",
		          __FUNCTION__, tcp_ctask->exp_datasn, datasn);
544
		return ISCSI_ERR_DATASN;
545
	}
546

547
	tcp_ctask->exp_datasn++;
548

549
	tcp_ctask->data_offset = be32_to_cpu(rhdr->offset);
550
	if (tcp_ctask->data_offset + tcp_conn->in.datalen > total_in_length) {
551 552
		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
		          __FUNCTION__, tcp_ctask->data_offset,
553
		          tcp_conn->in.datalen, total_in_length);
554
		return ISCSI_ERR_DATA_OFFSET;
555
	}
556 557

	if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
558
		sc->result = (DID_OK << 16) | rhdr->cmd_status;
559
		conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
560 561
		if (rhdr->flags & (ISCSI_FLAG_DATA_UNDERFLOW |
		                   ISCSI_FLAG_DATA_OVERFLOW)) {
562 563 564
			int res_count = be32_to_cpu(rhdr->residual_count);

			if (res_count > 0 &&
565
			    (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW ||
566 567
			     res_count <= total_in_length))
				scsi_in(sc)->resid = res_count;
568
			else
569 570
				sc->result = (DID_BAD_TARGET << 16) |
					rhdr->cmd_status;
571
		}
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
	}

	conn->datain_pdus_cnt++;
	return 0;
}

/**
 * iscsi_solicit_data_init - initialize first Data-Out
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @r2t: R2T info
 *
 * Notes:
 *	Initialize first Data-Out within this R2T sequence and finds
 *	proper data_offset within this SCSI command.
 *
 *	This function is called with connection lock taken.
 **/
static void
iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
			struct iscsi_r2t_info *r2t)
{
	struct iscsi_data *hdr;

596
	hdr = &r2t->dtask.hdr;
597 598 599 600 601
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
602 603
	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
	hdr->itt = ctask->hdr->itt;
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
	hdr->exp_statsn = r2t->exp_statsn;
	hdr->offset = cpu_to_be32(r2t->data_offset);
	if (r2t->data_length > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
		hdr->flags = 0;
	} else {
		hton24(hdr->dlength, r2t->data_length);
		r2t->data_count = r2t->data_length;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}
	conn->dataout_pdus_cnt++;

	r2t->sent = 0;
}

/**
 * iscsi_r2t_rsp - iSCSI R2T Response processing
 * @conn: iscsi connection
 * @ctask: scsi command task
 **/
static int
iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
{
	struct iscsi_r2t_info *r2t;
	struct iscsi_session *session = conn->session;
630 631 632
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
633 634 635
	int r2tsn = be32_to_cpu(rhdr->r2tsn);
	int rc;

636
	if (tcp_conn->in.datalen) {
637 638 639
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2t with datalen %d\n",
				  tcp_conn->in.datalen);
640
		return ISCSI_ERR_DATALEN;
641
	}
642

643 644 645
	if (tcp_ctask->exp_datasn != r2tsn){
		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
		          __FUNCTION__, tcp_ctask->exp_datasn, r2tsn);
646
		return ISCSI_ERR_R2TSN;
647
	}
648 649

	/* fill-in new R2T associated with the task */
650 651
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);

652
	if (!ctask->sc || session->state != ISCSI_STATE_LOGGED_IN) {
653 654 655
		iscsi_conn_printk(KERN_INFO, conn,
				  "dropping R2T itt %d in recovery.\n",
				  ctask->itt);
656 657
		return 0;
	}
658

659
	rc = __kfifo_get(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
660 661 662 663
	BUG_ON(!rc);

	r2t->exp_statsn = rhdr->statsn;
	r2t->data_length = be32_to_cpu(rhdr->data_length);
664
	if (r2t->data_length == 0) {
665 666
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with zero data len\n");
667 668
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
669 670 671
		return ISCSI_ERR_DATALEN;
	}

672 673 674 675 676
	if (r2t->data_length > session->max_burst)
		debug_scsi("invalid R2T with data len %u and max burst %u."
			   "Attempting to execute request.\n",
			    r2t->data_length, session->max_burst);

677
	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
678
	if (r2t->data_offset + r2t->data_length > scsi_out(ctask->sc)->length) {
679 680 681
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with data len %u at offset %u "
				  "and total length %d\n", r2t->data_length,
682
				  r2t->data_offset, scsi_out(ctask->sc)->length);
683 684
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
685 686 687 688 689 690 691 692
		return ISCSI_ERR_DATALEN;
	}

	r2t->ttt = rhdr->ttt; /* no flip */
	r2t->solicit_datasn = 0;

	iscsi_solicit_data_init(conn, ctask, r2t);

693
	tcp_ctask->exp_datasn = r2tsn + 1;
694
	__kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
695
	conn->r2t_pdus_cnt++;
696 697

	iscsi_requeue_ctask(ctask);
698 699 700
	return 0;
}

O
Olaf Kirch 已提交
701 702 703 704 705
/*
 * Handle incoming reply to DataIn command
 */
static int
iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
706
			  struct iscsi_segment *segment)
O
Olaf Kirch 已提交
707 708 709 710 711
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
	int rc;

712
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
		return ISCSI_ERR_DATA_DGST;

	/* check for non-exceptional status */
	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
		if (rc)
			return rc;
	}

	iscsi_tcp_hdr_recv_prep(tcp_conn);
	return 0;
}

/**
 * iscsi_tcp_hdr_dissect - process PDU header
 * @conn: iSCSI connection
 * @hdr: PDU header
 *
 * This function analyzes the header of the PDU received,
 * and performs several sanity checks. If the PDU is accompanied
 * by data, the receive buffer is set up to copy the incoming data
 * to the correct location.
 */
736
static int
O
Olaf Kirch 已提交
737
iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
738
{
739
	int rc = 0, opcode, ahslen;
740
	struct iscsi_session *session = conn->session;
741
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
O
Olaf Kirch 已提交
742 743
	struct iscsi_cmd_task *ctask;
	uint32_t itt;
744 745

	/* verify PDU length */
746 747
	tcp_conn->in.datalen = ntoh24(hdr->dlength);
	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
748 749 750
		iscsi_conn_printk(KERN_ERR, conn,
				  "iscsi_tcp: datalen %d > %d\n",
				  tcp_conn->in.datalen, conn->max_recv_dlength);
751 752 753
		return ISCSI_ERR_DATALEN;
	}

O
Olaf Kirch 已提交
754 755 756
	/* Additional header segments. So far, we don't
	 * process additional headers.
	 */
757
	ahslen = hdr->hlength << 2;
758

759
	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
760
	/* verify itt (itt encoding: age+cid+itt) */
761
	rc = iscsi_verify_itt(conn, hdr, &itt);
762
	if (rc)
763
		return rc;
764

O
Olaf Kirch 已提交
765 766
	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
		  opcode, ahslen, tcp_conn->in.datalen);
767

768 769
	switch(opcode) {
	case ISCSI_OP_SCSI_DATA_IN:
O
Olaf Kirch 已提交
770
		ctask = session->cmds[itt];
771
		spin_lock(&conn->session->lock);
O
Olaf Kirch 已提交
772
		rc = iscsi_data_rsp(conn, ctask);
773
		spin_unlock(&conn->session->lock);
774 775
		if (rc)
			return rc;
O
Olaf Kirch 已提交
776 777 778
		if (tcp_conn->in.datalen) {
			struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
			struct hash_desc *rx_hash = NULL;
779
			struct scsi_data_buffer *sdb = scsi_in(ctask->sc);
O
Olaf Kirch 已提交
780 781 782 783

			/*
			 * Setup copy of Data-In into the Scsi_Cmnd
			 * Scatterlist case:
784
			 * We set up the iscsi_segment to point to the next
O
Olaf Kirch 已提交
785 786 787 788 789 790 791 792 793 794 795
			 * scatterlist entry to copy to. As we go along,
			 * we move on to the next scatterlist entry and
			 * update the digest per-entry.
			 */
			if (conn->datadgst_en)
				rx_hash = &tcp_conn->rx_hash;

			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
				  "datalen=%d)\n", tcp_conn,
				  tcp_ctask->data_offset,
				  tcp_conn->in.datalen);
796
			return iscsi_segment_seek_sg(&tcp_conn->in.segment,
797 798
						     sdb->table.sgl,
						     sdb->table.nents,
799 800 801 802
						     tcp_ctask->data_offset,
						     tcp_conn->in.datalen,
						     iscsi_tcp_process_data_in,
						     rx_hash);
O
Olaf Kirch 已提交
803
		}
804 805
		/* fall through */
	case ISCSI_OP_SCSI_CMD_RSP:
O
Olaf Kirch 已提交
806 807 808 809 810
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
811 812
		break;
	case ISCSI_OP_R2T:
O
Olaf Kirch 已提交
813
		ctask = session->cmds[itt];
814 815
		if (ahslen)
			rc = ISCSI_ERR_AHSLEN;
816 817
		else if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) {
			spin_lock(&session->lock);
O
Olaf Kirch 已提交
818
			rc = iscsi_r2t_rsp(conn, ctask);
819 820
			spin_unlock(&session->lock);
		} else
821 822 823 824 825 826
			rc = ISCSI_ERR_PROTO;
		break;
	case ISCSI_OP_LOGIN_RSP:
	case ISCSI_OP_TEXT_RSP:
	case ISCSI_OP_REJECT:
	case ISCSI_OP_ASYNC_EVENT:
827 828 829 830 831
		/*
		 * It is possible that we could get a PDU with a buffer larger
		 * than 8K, but there are no targets that currently do this.
		 * For now we fail until we find a vendor that needs it
		 */
O
Olaf Kirch 已提交
832
		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
833 834 835 836 837 838
			iscsi_conn_printk(KERN_ERR, conn,
					  "iscsi_tcp: received buffer of "
					  "len %u but conn buffer is only %u "
					  "(opcode %0x)\n",
					  tcp_conn->in.datalen,
					  ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
839 840 841 842
			rc = ISCSI_ERR_PROTO;
			break;
		}

O
Olaf Kirch 已提交
843 844 845 846 847 848 849
		/* If there's data coming in with the response,
		 * receive it to the connection's buffer.
		 */
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
850
	/* fall through */
851 852
	case ISCSI_OP_LOGOUT_RSP:
	case ISCSI_OP_NOOP_IN:
853 854 855 856 857 858 859
	case ISCSI_OP_SCSI_TMFUNC_RSP:
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
		break;
	default:
		rc = ISCSI_ERR_BAD_OPCODE;
		break;
	}
860

O
Olaf Kirch 已提交
861 862 863 864 865 866
	if (rc == 0) {
		/* Anything that comes with data should have
		 * been handled above. */
		if (tcp_conn->in.datalen)
			return ISCSI_ERR_PROTO;
		iscsi_tcp_hdr_recv_prep(tcp_conn);
867 868
	}

O
Olaf Kirch 已提交
869
	return rc;
870 871
}

O
Olaf Kirch 已提交
872 873 874 875 876 877 878 879 880
/**
 * iscsi_tcp_hdr_recv_done - process PDU header
 *
 * This is the callback invoked when the PDU header has
 * been received. If the header is followed by additional
 * header segments, we go back for more data.
 */
static int
iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
881
			struct iscsi_segment *segment)
882
{
O
Olaf Kirch 已提交
883 884
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr;
885

O
Olaf Kirch 已提交
886 887 888 889 890
	/* Check if there are additional header segments
	 * *prior* to computing the digest, because we
	 * may need to go back to the caller for more.
	 */
	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
891
	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
O
Olaf Kirch 已提交
892 893 894 895 896 897 898 899 900
		/* Bump the header length - the caller will
		 * just loop around and get the AHS for us, and
		 * call again. */
		unsigned int ahslen = hdr->hlength << 2;

		/* Make sure we don't overflow */
		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
			return ISCSI_ERR_AHSLEN;

901 902
		segment->total_size += ahslen;
		segment->size += ahslen;
O
Olaf Kirch 已提交
903
		return 0;
904 905
	}

O
Olaf Kirch 已提交
906 907 908 909
	/* We're done processing the header. See if we're doing
	 * header digests; if so, set up the recv_digest buffer
	 * and go back for more. */
	if (conn->hdrdgst_en) {
910 911 912
		if (segment->digest_len == 0) {
			iscsi_tcp_segment_splice_digest(segment,
							segment->recv_digest);
O
Olaf Kirch 已提交
913
			return 0;
914
		}
O
Olaf Kirch 已提交
915
		iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
916 917
				      segment->total_copied - ISCSI_DIGEST_SIZE,
				      segment->digest);
918

919
		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
920
			return ISCSI_ERR_HDR_DGST;
921
	}
O
Olaf Kirch 已提交
922 923 924

	tcp_conn->in.hdr = hdr;
	return iscsi_tcp_hdr_dissect(conn, hdr);
925 926 927
}

/**
O
Olaf Kirch 已提交
928
 * iscsi_tcp_recv - TCP receive in sendfile fashion
929 930 931 932 933 934
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
 **/
static int
O
Olaf Kirch 已提交
935 936
iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
	       unsigned int offset, size_t len)
937 938
{
	struct iscsi_conn *conn = rd_desc->arg.data;
939
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
940
	struct iscsi_segment *segment = &tcp_conn->in.segment;
O
Olaf Kirch 已提交
941 942 943
	struct skb_seq_state seq;
	unsigned int consumed = 0;
	int rc = 0;
944

O
Olaf Kirch 已提交
945
	debug_tcp("in %d bytes\n", skb->len - offset);
946 947 948 949 950 951

	if (unlikely(conn->suspend_rx)) {
		debug_tcp("conn %d Rx suspended!\n", conn->id);
		return 0;
	}

O
Olaf Kirch 已提交
952 953 954 955
	skb_prepare_seq_read(skb, offset, skb->len, &seq);
	while (1) {
		unsigned int avail;
		const u8 *ptr;
956

O
Olaf Kirch 已提交
957
		avail = skb_seq_read(consumed, &ptr, &seq);
958 959 960
		if (avail == 0) {
			debug_tcp("no more data avail. Consumed %d\n",
				  consumed);
O
Olaf Kirch 已提交
961
			break;
962 963
		}
		BUG_ON(segment->copied >= segment->size);
O
Olaf Kirch 已提交
964 965

		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
966
		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
O
Olaf Kirch 已提交
967 968 969
		BUG_ON(rc == 0);
		consumed += rc;

970 971 972
		if (segment->total_copied >= segment->total_size) {
			debug_tcp("segment done\n");
			rc = segment->done(tcp_conn, segment);
O
Olaf Kirch 已提交
973 974 975
			if (rc != 0) {
				skb_abort_seq_read(&seq);
				goto error;
976
			}
977

O
Olaf Kirch 已提交
978
			/* The done() functions sets up the
979
			 * next segment. */
980 981
		}
	}
982
	skb_abort_seq_read(&seq);
O
Olaf Kirch 已提交
983 984
	conn->rxdata_octets += consumed;
	return consumed;
985

O
Olaf Kirch 已提交
986 987 988 989
error:
	debug_tcp("Error receiving PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return 0;
990 991 992 993 994 995
}

static void
iscsi_tcp_data_ready(struct sock *sk, int flag)
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
996
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
997 998 999 1000
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

1001
	/*
O
Olaf Kirch 已提交
1002
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
1003
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
1004
	 * hand us all the skbs that are available. iscsi_tcp_recv
1005 1006
	 * handled pdus that cross buffers or pdus that still need data.
	 */
1007
	rd_desc.arg.data = conn;
1008
	rd_desc.count = 1;
O
Olaf Kirch 已提交
1009
	tcp_read_sock(sk, &rd_desc, iscsi_tcp_recv);
1010 1011

	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
1012 1013 1014

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
1015
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
1016 1017 1018 1019 1020
}

static void
iscsi_tcp_state_change(struct sock *sk)
{
1021
	struct iscsi_tcp_conn *tcp_conn;
1022 1023 1024 1025 1026 1027 1028 1029 1030
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

M
Mike Christie 已提交
1031 1032 1033
	if ((sk->sk_state == TCP_CLOSE_WAIT ||
	     sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc)) {
1034 1035 1036 1037
		debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

1038 1039
	tcp_conn = conn->dd_data;
	old_state_change = tcp_conn->old_state_change;
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
static void
iscsi_write_space(struct sock *sk)
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
1054 1055 1056
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	tcp_conn->old_write_space(sk);
1057
	debug_tcp("iscsi_write_space: cid %d\n", conn->id);
1058
	scsi_queue_work(conn->session->host, &conn->xmitwork);
1059 1060 1061 1062 1063
}

static void
iscsi_conn_set_callbacks(struct iscsi_conn *conn)
{
1064 1065
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk = tcp_conn->sock->sk;
1066 1067 1068 1069

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
1070 1071 1072
	tcp_conn->old_data_ready = sk->sk_data_ready;
	tcp_conn->old_state_change = sk->sk_state_change;
	tcp_conn->old_write_space = sk->sk_write_space;
1073 1074 1075 1076 1077 1078 1079
	sk->sk_data_ready = iscsi_tcp_data_ready;
	sk->sk_state_change = iscsi_tcp_state_change;
	sk->sk_write_space = iscsi_write_space;
	write_unlock_bh(&sk->sk_callback_lock);
}

static void
1080
iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
1081
{
1082
	struct sock *sk = tcp_conn->sock->sk;
1083 1084 1085 1086

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
1087 1088 1089
	sk->sk_data_ready   = tcp_conn->old_data_ready;
	sk->sk_state_change = tcp_conn->old_state_change;
	sk->sk_write_space  = tcp_conn->old_write_space;
1090 1091 1092 1093 1094
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
1095 1096 1097 1098
 * iscsi_xmit - TCP transmit
 **/
static int
iscsi_xmit(struct iscsi_conn *conn)
1099
{
1100
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1101 1102 1103
	struct iscsi_segment *segment = &tcp_conn->out.segment;
	unsigned int consumed = 0;
	int rc = 0;
1104

1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
	while (1) {
		rc = iscsi_tcp_xmit_segment(tcp_conn, segment);
		if (rc < 0)
			goto error;
		if (rc == 0)
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
				if (rc < 0)
					goto error;
			}
		}
1121 1122
	}

1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
	debug_tcp("xmit %d bytes\n", consumed);

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
	debug_tcp("Error sending PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return rc;
1134 1135 1136
}

/**
1137 1138
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
1139
static inline int
1140
iscsi_tcp_xmit_qlen(struct iscsi_conn *conn)
1141
{
1142 1143
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_segment *segment = &tcp_conn->out.segment;
1144

1145
	return segment->total_copied - segment->total_size;
1146 1147 1148
}

static inline int
1149
iscsi_tcp_flush(struct iscsi_conn *conn)
1150
{
1151 1152 1153 1154 1155
	int rc;

	while (iscsi_tcp_xmit_qlen(conn)) {
		rc = iscsi_xmit(conn);
		if (rc == 0)
1156
			return -EAGAIN;
1157 1158
		if (rc < 0)
			return rc;
1159
	}
1160

1161
	return 0;
1162 1163
}

1164 1165 1166 1167 1168 1169 1170
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
static int
iscsi_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
			struct iscsi_segment *segment)
1171
{
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
	tcp_conn->out.segment = tcp_conn->out.data_segment;
	debug_tcp("Header done. Next segment size %u total_size %u\n",
		  tcp_conn->out.segment.size, tcp_conn->out.segment.total_size);
	return 0;
}

static void
iscsi_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, size_t hdrlen)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	debug_tcp("%s(%p%s)\n", __FUNCTION__, tcp_conn,
			conn->hdrdgst_en? ", digest enabled" : "");

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
	memset(&tcp_conn->out.data_segment, 0, sizeof(struct iscsi_segment));

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
	 * sure that both iscsi_tcp_ctask and mtask have
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
		iscsi_tcp_dgst_header(&tcp_conn->tx_hash, hdr, hdrlen,
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
	tcp_conn->out.hdr = hdr;

	iscsi_segment_init_linear(&tcp_conn->out.segment, hdr, hdrlen,
				iscsi_tcp_send_hdr_done, NULL);
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
iscsi_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			 unsigned int count, unsigned int offset,
			 unsigned int len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

	debug_tcp("%s(%p, offset=%d, datalen=%d%s)\n", __FUNCTION__,
			tcp_conn, offset, len,
			conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	return iscsi_segment_seek_sg(&tcp_conn->out.data_segment,
				   sg, count, offset, len,
				   NULL, tx_hash);
}

static void
iscsi_tcp_send_linear_data_prepare(struct iscsi_conn *conn, void *data,
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

	debug_tcp("%s(%p, datalen=%d%s)\n", __FUNCTION__, tcp_conn, len,
		  conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	iscsi_segment_init_linear(&tcp_conn->out.data_segment,
				data, len, NULL, tx_hash);
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276
}

/**
 * iscsi_solicit_data_cont - initialize next Data-Out
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @r2t: R2T info
 * @left: bytes left to transfer
 *
 * Notes:
 *	Initialize next Data-Out within this R2T sequence and continue
 *	to process next Scatter-Gather element(if any) of this SCSI command.
 *
 *	Called under connection lock.
 **/
1277
static int
1278
iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
1279
			struct iscsi_r2t_info *r2t)
1280 1281
{
	struct iscsi_data *hdr;
1282 1283 1284 1285 1286 1287
	int new_offset, left;

	BUG_ON(r2t->data_length - r2t->sent < 0);
	left = r2t->data_length - r2t->sent;
	if (left == 0)
		return 0;
1288

1289
	hdr = &r2t->dtask.hdr;
1290 1291 1292 1293 1294
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
1295 1296
	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
	hdr->itt = ctask->hdr->itt;
1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308
	hdr->exp_statsn = r2t->exp_statsn;
	new_offset = r2t->data_offset + r2t->sent;
	hdr->offset = cpu_to_be32(new_offset);
	if (left > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
	} else {
		hton24(hdr->dlength, left);
		r2t->data_count = left;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}

1309 1310
	conn->dataout_pdus_cnt++;
	return 1;
1311 1312 1313
}

/**
1314
 * iscsi_tcp_ctask - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
1315 1316 1317 1318
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @sc: scsi command
 **/
1319 1320
static int
iscsi_tcp_ctask_init(struct iscsi_cmd_task *ctask)
1321
{
1322
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1323 1324 1325
	struct iscsi_conn *conn = ctask->conn;
	struct scsi_cmnd *sc = ctask->sc;
	int err;
1326

1327
	BUG_ON(__kfifo_len(tcp_ctask->r2tqueue));
1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340
	tcp_ctask->sent = 0;
	tcp_ctask->exp_datasn = 0;

	/* Prepare PDU, optionally w/ immediate data */
	debug_scsi("ctask deq [cid %d itt 0x%x imm %d unsol %d]\n",
		    conn->id, ctask->itt, ctask->imm_count,
		    ctask->unsol_count);
	iscsi_tcp_send_hdr_prep(conn, ctask->hdr, ctask->hdr_len);

	if (!ctask->imm_count)
		return 0;

	/* If we have immediate data, attach a payload */
1341 1342
	err = iscsi_tcp_send_data_prep(conn, scsi_out(sc)->table.sgl,
				       scsi_out(sc)->table.nents,
1343 1344 1345 1346 1347 1348
				       0, ctask->imm_count);
	if (err)
		return err;
	tcp_ctask->sent += ctask->imm_count;
	ctask->imm_count = 0;
	return 0;
1349 1350 1351
}

/**
1352
 * iscsi_tcp_mtask_xmit - xmit management(immediate) task
1353 1354 1355 1356 1357 1358 1359 1360 1361
 * @conn: iscsi connection
 * @mtask: task management task
 *
 * Notes:
 *	The function can return -EAGAIN in which case caller must
 *	call it again later, or recover. '0' return code means successful
 *	xmit.
 **/
static int
1362
iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
1363
{
1364
	int rc;
1365

1366 1367 1368 1369
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
		return rc;
1370

A
Al Viro 已提交
1371
	if (mtask->hdr->itt == RESERVED_ITT) {
1372 1373 1374
		struct iscsi_session *session = conn->session;

		spin_lock_bh(&session->lock);
1375
		iscsi_free_mgmt_task(conn, mtask);
1376 1377
		spin_unlock_bh(&session->lock);
	}
1378

1379 1380 1381
	return 0;
}

1382 1383 1384 1385 1386 1387 1388 1389 1390
/*
 * iscsi_tcp_ctask_xmit - xmit normal PDU task
 * @conn: iscsi connection
 * @ctask: iscsi command task
 *
 * We're expected to return 0 when everything was transmitted succesfully,
 * -EAGAIN if there's still data in the queue, or != 0 for any other kind
 * of error.
 */
1391
static int
1392
iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
1393
{
1394
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1395
	struct scsi_cmnd *sc = ctask->sc;
1396
	struct scsi_data_buffer *sdb = scsi_out(sc);
1397
	int rc = 0;
1398

1399 1400 1401 1402
flush:
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
1403 1404
		return rc;

1405 1406 1407
	/* Are we done already? */
	if (sc->sc_data_direction != DMA_TO_DEVICE)
		return 0;
1408

1409 1410
	if (ctask->unsol_count != 0) {
		struct iscsi_data *hdr = &tcp_ctask->unsol_dtask.hdr;
1411

1412 1413 1414 1415 1416 1417
		/* Prepare a header for the unsolicited PDU.
		 * The amount of data we want to send will be
		 * in ctask->data_count.
		 * FIXME: return the data count instead.
		 */
		iscsi_prep_unsolicit_data_pdu(ctask, hdr);
1418

1419 1420
		debug_tcp("unsol dout [itt 0x%x doff %d dlen %d]\n",
				ctask->itt, tcp_ctask->sent, ctask->data_count);
1421

1422
		iscsi_tcp_send_hdr_prep(conn, hdr, sizeof(*hdr));
1423 1424
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
					      sdb->table.nents, tcp_ctask->sent,
1425
					      ctask->data_count);
1426
		if (rc)
1427 1428 1429 1430 1431 1432 1433
			goto fail;
		tcp_ctask->sent += ctask->data_count;
		ctask->unsol_count -= ctask->data_count;
		goto flush;
	} else {
		struct iscsi_session *session = conn->session;
		struct iscsi_r2t_info *r2t;
1434

1435
		/* All unsolicited PDUs sent. Check for solicited PDUs.
1436
		 */
1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447
		spin_lock_bh(&session->lock);
		r2t = tcp_ctask->r2t;
		if (r2t != NULL) {
			/* Continue with this R2T? */
			if (!iscsi_solicit_data_cont(conn, ctask, r2t)) {
				debug_scsi("  done with r2t %p\n", r2t);

				__kfifo_put(tcp_ctask->r2tpool.queue,
					    (void*)&r2t, sizeof(void*));
				tcp_ctask->r2t = r2t = NULL;
			}
1448 1449
		}

1450
		if (r2t == NULL) {
1451 1452
			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
				    sizeof(void*));
1453
			r2t = tcp_ctask->r2t;
1454
		}
1455
		spin_unlock_bh(&session->lock);
1456

1457 1458 1459 1460
		/* Waiting for more R2Ts to arrive. */
		if (r2t == NULL) {
			debug_tcp("no R2Ts yet\n");
			return 0;
1461 1462
		}

1463 1464 1465
		debug_scsi("sol dout %p [dsn %d itt 0x%x doff %d dlen %d]\n",
			r2t, r2t->solicit_datasn - 1, ctask->itt,
			r2t->data_offset + r2t->sent, r2t->data_count);
1466

1467 1468
		iscsi_tcp_send_hdr_prep(conn, &r2t->dtask.hdr,
					sizeof(struct iscsi_hdr));
1469

1470 1471
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
					      sdb->table.nents,
1472 1473
					      r2t->data_offset + r2t->sent,
					      r2t->data_count);
1474
		if (rc)
1475 1476 1477 1478
			goto fail;
		tcp_ctask->sent += r2t->data_count;
		r2t->sent += r2t->data_count;
		goto flush;
1479 1480
	}
	return 0;
1481 1482 1483
fail:
	iscsi_conn_failure(conn, rc);
	return -EIO;
1484 1485
}

1486 1487
static struct iscsi_cls_conn *
iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
1488
{
1489 1490 1491
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
1492

1493 1494 1495 1496
	cls_conn = iscsi_conn_setup(cls_session, conn_idx);
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
1497
	/*
1498 1499
	 * due to strange issues with iser these are not set
	 * in iscsi_conn_setup
1500
	 */
1501
	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
1502

1503 1504 1505
	tcp_conn = kzalloc(sizeof(*tcp_conn), GFP_KERNEL);
	if (!tcp_conn)
		goto tcp_conn_alloc_fail;
1506

1507 1508
	conn->dd_data = tcp_conn;
	tcp_conn->iscsi_conn = conn;
1509

1510 1511 1512
	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->tx_hash.flags = 0;
1513
	if (IS_ERR(tcp_conn->tx_hash.tfm))
1514 1515
		goto free_tcp_conn;

1516 1517 1518
	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->rx_hash.flags = 0;
1519
	if (IS_ERR(tcp_conn->rx_hash.tfm))
1520 1521
		goto free_tx_tfm;

1522
	return cls_conn;
1523

1524
free_tx_tfm:
1525
	crypto_free_hash(tcp_conn->tx_hash.tfm);
1526
free_tcp_conn:
1527 1528 1529 1530 1531
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
1532
	kfree(tcp_conn);
1533 1534 1535
tcp_conn_alloc_fail:
	iscsi_conn_teardown(cls_conn);
	return NULL;
1536 1537
}

1538 1539 1540
static void
iscsi_tcp_release_conn(struct iscsi_conn *conn)
{
1541
	struct iscsi_session *session = conn->session;
1542
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1543
	struct socket *sock = tcp_conn->sock;
1544

1545
	if (!sock)
1546 1547
		return;

1548
	sock_hold(sock->sk);
1549
	iscsi_conn_restore_callbacks(tcp_conn);
1550
	sock_put(sock->sk);
1551

1552
	spin_lock_bh(&session->lock);
1553 1554
	tcp_conn->sock = NULL;
	conn->recv_lock = NULL;
1555 1556
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
1557 1558
}

1559
static void
1560
iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
1561
{
1562 1563
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1564

1565
	iscsi_tcp_release_conn(conn);
1566
	iscsi_conn_teardown(cls_conn);
1567

P
Pete Wyckoff 已提交
1568 1569 1570 1571
	if (tcp_conn->tx_hash.tfm)
		crypto_free_hash(tcp_conn->tx_hash.tfm);
	if (tcp_conn->rx_hash.tfm)
		crypto_free_hash(tcp_conn->rx_hash.tfm);
1572

1573 1574
	kfree(tcp_conn);
}
1575

1576 1577 1578 1579 1580 1581 1582 1583 1584
static void
iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
	struct iscsi_conn *conn = cls_conn->dd_data;

	iscsi_conn_stop(cls_conn, flag);
	iscsi_tcp_release_conn(conn);
}

1585 1586 1587 1588 1589 1590 1591 1592 1593 1594
static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
			      char *buf, int *port,
			      int (*getname)(struct socket *, struct sockaddr *,
					int *addrlen))
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

1595
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr));
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

1625 1626
static int
iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
1627
		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
1628 1629
		    int is_leading)
{
1630 1631
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
	struct iscsi_host *ihost = shost_priv(shost);
1632 1633 1634 1635 1636
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk;
	struct socket *sock;
	int err;
1637

1638
	/* lookup for existing socket */
1639
	sock = sockfd_lookup((int)transport_eph, &err);
1640
	if (!sock) {
1641 1642
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
1643
		return -EEXIST;
1644
	}
1645 1646 1647 1648 1649 1650 1651 1652 1653 1654
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
	err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
				 &conn->portal_port, kernel_getpeername);
	if (err)
		goto free_socket;

1655 1656
	err = iscsi_tcp_get_addr(conn, sock, ihost->local_address,
				&ihost->local_port, kernel_getsockname);
1657 1658
	if (err)
		goto free_socket;
1659

1660 1661
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
1662
		goto free_socket;
1663

1664 1665
	/* bind iSCSI connection and socket */
	tcp_conn->sock = sock;
1666

1667 1668 1669 1670 1671
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
1672

1673
	/* FIXME: disable Nagle's algorithm */
1674

1675 1676 1677 1678 1679 1680 1681 1682 1683 1684
	/*
	 * Intercept TCP callbacks for sendfile like receive
	 * processing.
	 */
	conn->recv_lock = &sk->sk_callback_lock;
	iscsi_conn_set_callbacks(conn);
	tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
1685
	iscsi_tcp_hdr_recv_prep(tcp_conn);
1686
	return 0;
1687 1688 1689 1690

free_socket:
	sockfd_put(sock);
	return err;
1691 1692
}

1693
/* called with host lock */
M
Mike Christie 已提交
1694
static void
1695
iscsi_tcp_mtask_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
1696
{
1697 1698 1699 1700 1701 1702 1703 1704 1705
	debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt);

	/* Prepare PDU, optionally w/ immediate data */
	iscsi_tcp_send_hdr_prep(conn, mtask->hdr, sizeof(*mtask->hdr));

	/* If we have immediate data, attach a payload */
	if (mtask->data_count)
		iscsi_tcp_send_linear_data_prepare(conn, mtask->data,
						   mtask->data_count);
1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718
}

static int
iscsi_r2tpool_alloc(struct iscsi_session *session)
{
	int i;
	int cmd_i;

	/*
	 * initialize per-task: R2T pool and xmit queue
	 */
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
	        struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
1719
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1720 1721 1722 1723 1724 1725 1726 1727

		/*
		 * pre-allocated x4 as much r2ts to handle race when
		 * target acks DataOut faster than we data_xmit() queues
		 * could replenish r2tqueue.
		 */

		/* R2T pool */
1728
		if (iscsi_pool_init(&tcp_ctask->r2tpool, session->max_r2t * 4, NULL,
1729
				    sizeof(struct iscsi_r2t_info))) {
1730 1731 1732 1733
			goto r2t_alloc_fail;
		}

		/* R2T xmit queue */
1734
		tcp_ctask->r2tqueue = kfifo_alloc(
1735
		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
1736
		if (tcp_ctask->r2tqueue == ERR_PTR(-ENOMEM)) {
1737
			iscsi_pool_free(&tcp_ctask->r2tpool);
1738 1739 1740 1741 1742 1743 1744 1745
			goto r2t_alloc_fail;
		}
	}

	return 0;

r2t_alloc_fail:
	for (i = 0; i < cmd_i; i++) {
1746 1747 1748 1749
		struct iscsi_cmd_task *ctask = session->cmds[i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;

		kfifo_free(tcp_ctask->r2tqueue);
1750
		iscsi_pool_free(&tcp_ctask->r2tpool);
1751 1752 1753 1754 1755 1756 1757 1758 1759 1760
	}
	return -ENOMEM;
}

static void
iscsi_r2tpool_free(struct iscsi_session *session)
{
	int i;

	for (i = 0; i < session->cmds_max; i++) {
1761 1762
		struct iscsi_cmd_task *ctask = session->cmds[i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1763

1764
		kfifo_free(tcp_ctask->r2tqueue);
1765
		iscsi_pool_free(&tcp_ctask->r2tpool);
1766 1767 1768 1769
	}
}

static int
1770
iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
1771
		     char *buf, int buflen)
1772
{
1773
	struct iscsi_conn *conn = cls_conn->dd_data;
1774
	struct iscsi_session *session = conn->session;
1775
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1776
	int value;
1777 1778 1779

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
1780
		iscsi_set_param(cls_conn, param, buf, buflen);
1781 1782
		break;
	case ISCSI_PARAM_DATADGST_EN:
1783
		iscsi_set_param(cls_conn, param, buf, buflen);
1784 1785
		tcp_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
1786 1787
		break;
	case ISCSI_PARAM_MAX_R2T:
1788
		sscanf(buf, "%d", &value);
1789 1790 1791
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
1792 1793
			break;
		iscsi_r2tpool_free(session);
1794
		iscsi_set_param(cls_conn, param, buf, buflen);
1795 1796 1797 1798
		if (iscsi_r2tpool_alloc(session))
			return -ENOMEM;
		break;
	default:
1799
		return iscsi_set_param(cls_conn, param, buf, buflen);
1800 1801 1802 1803 1804 1805
	}

	return 0;
}

static int
1806 1807
iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
			 enum iscsi_param param, char *buf)
1808
{
1809
	struct iscsi_conn *conn = cls_conn->dd_data;
1810
	int len;
1811 1812

	switch(param) {
1813
	case ISCSI_PARAM_CONN_PORT:
1814 1815 1816
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
1817
		break;
1818
	case ISCSI_PARAM_CONN_ADDRESS:
1819 1820 1821
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
1822 1823
		break;
	default:
1824
		return iscsi_conn_get_param(cls_conn, param, buf);
1825 1826 1827 1828 1829
	}

	return len;
}

1830
static void
1831
iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
1832
{
1833
	struct iscsi_conn *conn = cls_conn->dd_data;
1834
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846

	stats->txdata_octets = conn->txdata_octets;
	stats->rxdata_octets = conn->rxdata_octets;
	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
	stats->dataout_pdus = conn->dataout_pdus_cnt;
	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
	stats->datain_pdus = conn->datain_pdus_cnt;
	stats->r2t_pdus = conn->r2t_pdus_cnt;
	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
1847
	stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
1848
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
1849
	stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
1850 1851 1852 1853
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
}

1854
static struct iscsi_cls_session *
1855
iscsi_tcp_session_create(struct Scsi_Host *shost, uint16_t cmds_max,
1856 1857
			 uint16_t qdepth, uint32_t initial_cmdsn,
			 uint32_t *hostno)
1858
{
1859 1860 1861
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
	int cmd_i;
1862

1863 1864 1865 1866 1867 1868 1869 1870
	if (shost) {
		printk(KERN_ERR "iscsi_tcp: invalid shost %d.\n",
		       shost->host_no);
		return NULL;
	}

	shost = scsi_host_alloc(&iscsi_sht, sizeof(struct iscsi_host));
	if (!shost)
1871
		return NULL;
1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
	shost->transportt = iscsi_tcp_scsi_transport;
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
	shost->max_cmd_len = 16;

	iscsi_host_setup(shost, qdepth);

	if (scsi_add_host(shost, NULL))
		goto free_host;
	*hostno = shost->host_no;

	cls_session = iscsi_session_setup(&iscsi_tcp_transport, shost, cmds_max,
					  sizeof(struct iscsi_tcp_cmd_task),
					  sizeof(struct iscsi_tcp_mgmt_task),
					  initial_cmdsn);
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
1891

1892
	shost->can_queue = session->cmds_max;
1893 1894 1895 1896
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
		struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;

1897 1898
		ctask->hdr = &tcp_ctask->hdr.cmd_hdr;
		ctask->hdr_max = sizeof(tcp_ctask->hdr) - ISCSI_DIGEST_SIZE;
1899 1900 1901 1902 1903 1904
	}

	for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++) {
		struct iscsi_mgmt_task *mtask = session->mgmt_cmds[cmd_i];
		struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;

1905
		mtask->hdr = (struct iscsi_hdr *) &tcp_mtask->hdr;
1906 1907
	}

1908 1909
	if (iscsi_r2tpool_alloc(session))
		goto remove_session;
1910 1911
	return cls_session;

1912
remove_session:
1913
	iscsi_session_teardown(cls_session);
1914 1915 1916 1917 1918
remove_host:
	scsi_remove_host(shost);
free_host:
	iscsi_host_teardown(shost);
	scsi_host_put(shost);
1919 1920 1921 1922 1923
	return NULL;
}

static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
{
1924 1925 1926
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);

	iscsi_r2tpool_free(cls_session->dd_data);
1927
	iscsi_session_teardown(cls_session);
1928 1929 1930 1931

	scsi_remove_host(shost);
	iscsi_host_teardown(shost);
	scsi_host_put(shost);
1932 1933
}

1934 1935
static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
{
1936
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
1937 1938 1939 1940
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

1941
static struct scsi_host_template iscsi_sht = {
1942
	.module			= THIS_MODULE,
1943
	.name			= "iSCSI Initiator over TCP/IP",
1944 1945
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
1946
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
1947
	.sg_tablesize		= 4096,
1948
	.max_sectors		= 0xFFFF,
1949 1950
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
1951
	.eh_device_reset_handler= iscsi_eh_device_reset,
1952 1953
	.eh_host_reset_handler	= iscsi_eh_host_reset,
	.use_clustering         = DISABLE_CLUSTERING,
1954
	.slave_configure        = iscsi_tcp_slave_configure,
1955 1956 1957 1958
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

1959 1960 1961 1962 1963
static struct iscsi_transport iscsi_tcp_transport = {
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
1977
				  ISCSI_CONN_ADDRESS |
1978 1979 1980
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
1981 1982
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
1983
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
1984 1985 1986
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
				  ISCSI_LU_RESET_TMO |
				  ISCSI_PING_TMO | ISCSI_RECV_TMO,
1987
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
1988 1989
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
1990
	.conndata_size		= sizeof(struct iscsi_conn),
1991
	.sessiondata_size	= sizeof(struct iscsi_session),
1992 1993 1994 1995 1996 1997 1998
	/* session management */
	.create_session		= iscsi_tcp_session_create,
	.destroy_session	= iscsi_tcp_session_destroy,
	/* connection management */
	.create_conn		= iscsi_tcp_conn_create,
	.bind_conn		= iscsi_tcp_conn_bind,
	.destroy_conn		= iscsi_tcp_conn_destroy,
1999
	.set_param		= iscsi_conn_set_param,
2000
	.get_conn_param		= iscsi_tcp_conn_get_param,
2001
	.get_session_param	= iscsi_session_get_param,
2002
	.start_conn		= iscsi_conn_start,
2003
	.stop_conn		= iscsi_tcp_conn_stop,
2004
	/* iscsi host params */
2005
	.get_host_param		= iscsi_host_get_param,
2006
	.set_host_param		= iscsi_host_set_param,
2007
	/* IO */
2008 2009
	.send_pdu		= iscsi_conn_send_pdu,
	.get_stats		= iscsi_conn_get_stats,
2010 2011
	.init_cmd_task		= iscsi_tcp_ctask_init,
	.init_mgmt_task		= iscsi_tcp_mtask_init,
2012 2013 2014 2015
	.xmit_cmd_task		= iscsi_tcp_ctask_xmit,
	.xmit_mgmt_task		= iscsi_tcp_mtask_xmit,
	.cleanup_cmd_task	= iscsi_tcp_cleanup_ctask,
	/* recovery */
M
Mike Christie 已提交
2016
	.session_recovery_timedout = iscsi_session_recovery_timedout,
2017 2018 2019 2020 2021 2022
};

static int __init
iscsi_tcp_init(void)
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
2023 2024
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
2025 2026 2027
		return -EINVAL;
	}

2028 2029 2030
	iscsi_tcp_scsi_transport = iscsi_register_transport(
							&iscsi_tcp_transport);
	if (!iscsi_tcp_scsi_transport)
2031
		return -ENODEV;
2032

2033
	return 0;
2034 2035 2036 2037 2038 2039 2040 2041 2042 2043
}

static void __exit
iscsi_tcp_exit(void)
{
	iscsi_unregister_transport(&iscsi_tcp_transport);
}

module_init(iscsi_tcp_init);
module_exit(iscsi_tcp_exit);