iscsi_tcp.c 53.0 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/list.h>
#include <linux/inet.h>
M
Mike Christie 已提交
32
#include <linux/file.h>
33 34 35 36 37 38 39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41 42 43 44 45 46 47 48 49 50
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");
O
Olaf Kirch 已提交
51
#undef DEBUG_TCP
52 53 54
#define DEBUG_ASSERT

#ifdef DEBUG_TCP
55
#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
56 57 58 59 60 61 62 63 64 65 66
#else
#define debug_tcp(fmt...)
#endif

#ifndef DEBUG_ASSERT
#ifdef BUG_ON
#undef BUG_ON
#endif
#define BUG_ON(expr)
#endif

67 68 69 70
static struct scsi_transport_template *iscsi_tcp_scsi_transport;
static struct scsi_host_template iscsi_sht;
static struct iscsi_transport iscsi_tcp_transport;

71 72 73
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

O
Olaf Kirch 已提交
74
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
75
				   struct iscsi_segment *segment);
76

O
Olaf Kirch 已提交
77
/*
78
 * Scatterlist handling: inside the iscsi_segment, we
O
Olaf Kirch 已提交
79 80 81 82 83 84 85 86 87 88 89 90
 * remember an index into the scatterlist, and set data/size
 * to the current scatterlist entry. For highmem pages, we
 * kmap as needed.
 *
 * Note that the page is unmapped when we return from
 * TCP's data_ready handler, so we may end up mapping and
 * unmapping the same page repeatedly. The whole reason
 * for this is that we shouldn't keep the page mapped
 * outside the softirq.
 */

/**
91 92 93
 * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
 * @segment: the buffer object
 * @sg: scatterlist
O
Olaf Kirch 已提交
94 95
 * @offset: byte offset into that sg entry
 *
96
 * This function sets up the segment so that subsequent
O
Olaf Kirch 已提交
97 98 99 100
 * data is copied to the indicated sg entry, at the given
 * offset.
 */
static inline void
101 102
iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
			  struct scatterlist *sg, unsigned int offset)
O
Olaf Kirch 已提交
103
{
104 105 106 107 108
	segment->sg = sg;
	segment->sg_offset = offset;
	segment->size = min(sg->length - offset,
			    segment->total_size - segment->total_copied);
	segment->data = NULL;
O
Olaf Kirch 已提交
109 110 111
}

/**
112 113 114
 * iscsi_tcp_segment_map - map the current S/G page
 * @segment: iscsi_segment
 * @recv: 1 if called from recv path
O
Olaf Kirch 已提交
115 116 117 118 119 120
 *
 * We only need to possibly kmap data if scatter lists are being used,
 * because the iscsi passthrough and internal IO paths will never use high
 * mem pages.
 */
static inline void
121
iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
O
Olaf Kirch 已提交
122 123 124
{
	struct scatterlist *sg;

125
	if (segment->data != NULL || !segment->sg)
O
Olaf Kirch 已提交
126 127
		return;

128 129
	sg = segment->sg;
	BUG_ON(segment->sg_mapped);
O
Olaf Kirch 已提交
130
	BUG_ON(sg->length == 0);
131 132 133 134 135 136 137 138 139 140 141 142 143 144

	/*
	 * If the page count is greater than one it is ok to send
	 * to the network layer's zero copy send path. If not we
	 * have to go the slow sendmsg path. We always map for the
	 * recv path.
	 */
	if (page_count(sg_page(sg)) >= 1 && !recv)
		return;

	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
		  segment);
	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
O
Olaf Kirch 已提交
145 146 147
}

static inline void
148
iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
O
Olaf Kirch 已提交
149
{
150 151 152 153 154 155 156
	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);

	if (segment->sg_mapped) {
		debug_tcp("iscsi_tcp_segment_unmap valid\n");
		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
		segment->sg_mapped = NULL;
		segment->data = NULL;
O
Olaf Kirch 已提交
157 158 159 160 161 162 163
	}
}

/*
 * Splice the digest buffer into the buffer
 */
static inline void
164
iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
O
Olaf Kirch 已提交
165
{
166 167 168 169 170 171 172
	segment->data = digest;
	segment->digest_len = ISCSI_DIGEST_SIZE;
	segment->total_size += ISCSI_DIGEST_SIZE;
	segment->size = ISCSI_DIGEST_SIZE;
	segment->copied = 0;
	segment->sg = NULL;
	segment->hash = NULL;
O
Olaf Kirch 已提交
173 174 175
}

/**
176 177 178 179
 * iscsi_tcp_segment_done - check whether the segment is complete
 * @segment: iscsi segment to check
 * @recv: set to one of this is called from the recv path
 * @copied: number of bytes copied
O
Olaf Kirch 已提交
180
 *
181
 * Check if we're done receiving this segment. If the receive
O
Olaf Kirch 已提交
182 183 184 185 186 187 188 189
 * buffer is full but we expect more data, move on to the
 * next entry in the scatterlist.
 *
 * If the amount of data we received isn't a multiple of 4,
 * we will transparently receive the pad bytes, too.
 *
 * This function must be re-entrant.
 */
190
static inline int
191
iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv, unsigned copied)
192
{
O
Olaf Kirch 已提交
193
	static unsigned char padbuf[ISCSI_PAD_LEN];
194
	struct scatterlist sg;
195
	unsigned int pad;
O
Olaf Kirch 已提交
196

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
		  segment->size, recv ? "recv" : "xmit");
	if (segment->hash && copied) {
		/*
		 * If a segment is kmapd we must unmap it before sending
		 * to the crypto layer since that will try to kmap it again.
		 */
		iscsi_tcp_segment_unmap(segment);

		if (!segment->data) {
			sg_init_table(&sg, 1);
			sg_set_page(&sg, sg_page(segment->sg), copied,
				    segment->copied + segment->sg_offset +
							segment->sg->offset);
		} else
			sg_init_one(&sg, segment->data + segment->copied,
				    copied);
		crypto_hash_update(segment->hash, &sg, copied);
	}

	segment->copied += copied;
	if (segment->copied < segment->size) {
		iscsi_tcp_segment_map(segment, recv);
O
Olaf Kirch 已提交
220 221
		return 0;
	}
222

223 224 225
	segment->total_copied += segment->copied;
	segment->copied = 0;
	segment->size = 0;
226

O
Olaf Kirch 已提交
227
	/* Unmap the current scatterlist page, if there is one. */
228
	iscsi_tcp_segment_unmap(segment);
O
Olaf Kirch 已提交
229 230

	/* Do we have more scatterlist entries? */
231 232 233
	debug_tcp("total copied %u total size %u\n", segment->total_copied,
		   segment->total_size);
	if (segment->total_copied < segment->total_size) {
O
Olaf Kirch 已提交
234
		/* Proceed to the next entry in the scatterlist. */
235 236 237 238
		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
					  0);
		iscsi_tcp_segment_map(segment, recv);
		BUG_ON(segment->size == 0);
O
Olaf Kirch 已提交
239 240 241 242
		return 0;
	}

	/* Do we need to handle padding? */
243
	pad = iscsi_padding(segment->total_copied);
244
	if (pad != 0) {
O
Olaf Kirch 已提交
245
		debug_tcp("consume %d pad bytes\n", pad);
246 247 248
		segment->total_size += pad;
		segment->size = pad;
		segment->data = padbuf;
O
Olaf Kirch 已提交
249 250 251 252
		return 0;
	}

	/*
253
	 * Set us up for transferring the data digest. hdr digest
O
Olaf Kirch 已提交
254 255
	 * is completely handled in hdr done function.
	 */
256 257 258 259 260
	if (segment->hash) {
		crypto_hash_final(segment->hash, segment->digest);
		iscsi_tcp_segment_splice_digest(segment,
				 recv ? segment->recv_digest : segment->digest);
		return 0;
O
Olaf Kirch 已提交
261
	}
262

O
Olaf Kirch 已提交
263 264
	return 1;
}
265

O
Olaf Kirch 已提交
266
/**
267
 * iscsi_tcp_xmit_segment - transmit segment
O
Olaf Kirch 已提交
268
 * @tcp_conn: the iSCSI TCP connection
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
static int
iscsi_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment)
{
	struct socket *sk = tcp_conn->sock;
	unsigned int copied = 0;
	int r = 0;

	while (!iscsi_tcp_segment_done(segment, 0, r)) {
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_conn->sendpage(sk, sg_page(sg), offset, copy,
					       flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			if (copied || r == -EAGAIN)
				break;
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_tcp_segment_recv - copy data to segment
 * @tcp_conn: the iSCSI TCP connection
 * @segment: the buffer to copy to
O
Olaf Kirch 已提交
330 331 332 333 334 335 336 337 338 339 340 341 342 343
 * @ptr: data pointer
 * @len: amount of data available
 *
 * This function copies up to @len bytes to the
 * given buffer, and returns the number of bytes
 * consumed, which can actually be less than @len.
 *
 * If hash digest is enabled, the function will update the
 * hash while copying.
 * Combining these two operations doesn't buy us a lot (yet),
 * but in the future we could implement combined copy+crc,
 * just way we do for network layer checksums.
 */
static int
344 345 346
iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment, const void *ptr,
		       unsigned int len)
O
Olaf Kirch 已提交
347
{
348
	unsigned int copy = 0, copied = 0;
349

350 351 352 353 354
	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
		if (copied == len) {
			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
				  len);
			break;
O
Olaf Kirch 已提交
355
		}
356 357 358 359

		copy = min(len - copied, segment->size - segment->copied);
		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
		memcpy(segment->data + segment->copied, ptr + copied, copy);
O
Olaf Kirch 已提交
360 361 362 363 364 365 366 367 368 369
		copied += copy;
	}
	return copied;
}

static inline void
iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
		      unsigned char digest[ISCSI_DIGEST_SIZE])
{
	struct scatterlist sg;
370

O
Olaf Kirch 已提交
371 372 373 374 375 376
	sg_init_one(&sg, hdr, hdrlen);
	crypto_hash_digest(hash, &sg, hdrlen, digest);
}

static inline int
iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
377
		      struct iscsi_segment *segment)
O
Olaf Kirch 已提交
378
{
379
	if (!segment->digest_len)
O
Olaf Kirch 已提交
380 381
		return 1;

382 383
	if (memcmp(segment->recv_digest, segment->digest,
		   segment->digest_len)) {
O
Olaf Kirch 已提交
384 385 386 387 388 389 390 391
		debug_scsi("digest mismatch\n");
		return 0;
	}

	return 1;
}

/*
392
 * Helper function to set up segment buffer
O
Olaf Kirch 已提交
393 394
 */
static inline void
395 396
__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
397
{
398 399 400
	memset(segment, 0, sizeof(*segment));
	segment->total_size = size;
	segment->done = done;
O
Olaf Kirch 已提交
401 402

	if (hash) {
403
		segment->hash = hash;
O
Olaf Kirch 已提交
404 405 406 407 408
		crypto_hash_init(hash);
	}
}

static inline void
409 410 411
iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
			  size_t size, iscsi_segment_done_fn_t *done,
			  struct hash_desc *hash)
O
Olaf Kirch 已提交
412
{
413 414 415
	__iscsi_segment_init(segment, size, done, hash);
	segment->data = data;
	segment->size = size;
O
Olaf Kirch 已提交
416 417 418
}

static inline int
419 420 421 422
iscsi_segment_seek_sg(struct iscsi_segment *segment,
		      struct scatterlist *sg_list, unsigned int sg_count,
		      unsigned int offset, size_t size,
		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
423
{
424
	struct scatterlist *sg;
O
Olaf Kirch 已提交
425 426
	unsigned int i;

427 428 429 430 431 432 433 434
	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
		  offset, size);
	__iscsi_segment_init(segment, size, done, hash);
	for_each_sg(sg_list, sg, sg_count, i) {
		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
			   sg->offset);
		if (offset < sg->length) {
			iscsi_tcp_segment_init_sg(segment, sg, offset);
O
Olaf Kirch 已提交
435
			return 0;
436
		}
437
		offset -= sg->length;
438 439
	}

O
Olaf Kirch 已提交
440 441 442 443
	return ISCSI_ERR_DATA_OFFSET;
}

/**
444
 * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
O
Olaf Kirch 已提交
445 446 447 448 449 450 451 452 453 454 455
 * @tcp_conn: iscsi connection to prep for
 *
 * This function always passes NULL for the hash argument, because when this
 * function is called we do not yet know the final size of the header and want
 * to delay the digest processing until we know that.
 */
static void
iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
456
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
457 458 459 460 461 462 463 464 465
				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
				iscsi_tcp_hdr_recv_done, NULL);
}

/*
 * Handle incoming reply to any other type of command
 */
static int
iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
466
			 struct iscsi_segment *segment)
O
Olaf Kirch 已提交
467 468 469 470
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	int rc = 0;

471
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
472 473 474 475 476 477 478 479
		return ISCSI_ERR_DATA_DGST;

	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
			conn->data, tcp_conn->in.datalen);
	if (rc)
		return rc;

	iscsi_tcp_hdr_recv_prep(tcp_conn);
480 481 482
	return 0;
}

O
Olaf Kirch 已提交
483 484 485 486 487 488 489 490 491
static void
iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct hash_desc *rx_hash = NULL;

	if (conn->datadgst_en)
		rx_hash = &tcp_conn->rx_hash;

492
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
493 494 495 496
				conn->data, tcp_conn->in.datalen,
				iscsi_tcp_data_recv_done, rx_hash);
}

M
Mike Christie 已提交
497 498 499 500
/*
 * must be called with session lock
 */
static void
501
iscsi_tcp_cleanup_task(struct iscsi_conn *conn, struct iscsi_task *task)
502
{
503
	struct iscsi_tcp_task *tcp_task = task->dd_data;
504
	struct iscsi_r2t_info *r2t;
505

506 507
	/* nothing to do for mgmt tasks */
	if (!task->sc)
508 509
		return;

510 511 512
	/* flush task's r2t queues */
	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
513
			    sizeof(void*));
514
		debug_scsi("iscsi_tcp_cleanup_task pending r2t dropped\n");
515 516
	}

517
	r2t = tcp_task->r2t;
518
	if (r2t != NULL) {
519
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
520
			    sizeof(void*));
521
		tcp_task->r2t = NULL;
522
	}
523 524 525 526 527
}

/**
 * iscsi_data_rsp - SCSI Data-In Response processing
 * @conn: iscsi connection
528
 * @task: scsi command task
529 530
 **/
static int
531
iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
532
{
533
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
534
	struct iscsi_tcp_task *tcp_task = task->dd_data;
535
	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
536
	struct iscsi_session *session = conn->session;
537
	struct scsi_cmnd *sc = task->sc;
538
	int datasn = be32_to_cpu(rhdr->datasn);
539
	unsigned total_in_length = scsi_in(sc)->length;
540

541
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
542
	if (tcp_conn->in.datalen == 0)
543 544
		return 0;

545 546
	if (tcp_task->exp_datasn != datasn) {
		debug_tcp("%s: task->exp_datasn(%d) != rhdr->datasn(%d)\n",
547
		          __func__, tcp_task->exp_datasn, datasn);
548
		return ISCSI_ERR_DATASN;
549
	}
550

551
	tcp_task->exp_datasn++;
552

553 554
	tcp_task->data_offset = be32_to_cpu(rhdr->offset);
	if (tcp_task->data_offset + tcp_conn->in.datalen > total_in_length) {
555
		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
556
		          __func__, tcp_task->data_offset,
557
		          tcp_conn->in.datalen, total_in_length);
558
		return ISCSI_ERR_DATA_OFFSET;
559
	}
560 561

	if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
562
		sc->result = (DID_OK << 16) | rhdr->cmd_status;
563
		conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
564 565
		if (rhdr->flags & (ISCSI_FLAG_DATA_UNDERFLOW |
		                   ISCSI_FLAG_DATA_OVERFLOW)) {
566 567 568
			int res_count = be32_to_cpu(rhdr->residual_count);

			if (res_count > 0 &&
569
			    (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW ||
570 571
			     res_count <= total_in_length))
				scsi_in(sc)->resid = res_count;
572
			else
573 574
				sc->result = (DID_BAD_TARGET << 16) |
					rhdr->cmd_status;
575
		}
576 577 578 579 580 581 582 583 584
	}

	conn->datain_pdus_cnt++;
	return 0;
}

/**
 * iscsi_solicit_data_init - initialize first Data-Out
 * @conn: iscsi connection
585
 * @task: scsi command task
586 587 588 589 590 591 592 593 594
 * @r2t: R2T info
 *
 * Notes:
 *	Initialize first Data-Out within this R2T sequence and finds
 *	proper data_offset within this SCSI command.
 *
 *	This function is called with connection lock taken.
 **/
static void
595
iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_task *task,
596 597 598 599
			struct iscsi_r2t_info *r2t)
{
	struct iscsi_data *hdr;

600
	hdr = &r2t->dtask.hdr;
601 602 603 604 605
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
606 607
	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
	hdr->itt = task->hdr->itt;
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
	hdr->exp_statsn = r2t->exp_statsn;
	hdr->offset = cpu_to_be32(r2t->data_offset);
	if (r2t->data_length > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
		hdr->flags = 0;
	} else {
		hton24(hdr->dlength, r2t->data_length);
		r2t->data_count = r2t->data_length;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}
	conn->dataout_pdus_cnt++;

	r2t->sent = 0;
}

/**
 * iscsi_r2t_rsp - iSCSI R2T Response processing
 * @conn: iscsi connection
627
 * @task: scsi command task
628 629
 **/
static int
630
iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
631 632 633
{
	struct iscsi_r2t_info *r2t;
	struct iscsi_session *session = conn->session;
634
	struct iscsi_tcp_task *tcp_task = task->dd_data;
635 636
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
637 638 639
	int r2tsn = be32_to_cpu(rhdr->r2tsn);
	int rc;

640
	if (tcp_conn->in.datalen) {
641 642 643
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2t with datalen %d\n",
				  tcp_conn->in.datalen);
644
		return ISCSI_ERR_DATALEN;
645
	}
646

647 648
	if (tcp_task->exp_datasn != r2tsn){
		debug_tcp("%s: task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
649
		          __func__, tcp_task->exp_datasn, r2tsn);
650
		return ISCSI_ERR_R2TSN;
651
	}
652

653
	/* fill-in new R2T associated with the task */
654 655
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);

656
	if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
657 658
		iscsi_conn_printk(KERN_INFO, conn,
				  "dropping R2T itt %d in recovery.\n",
659
				  task->itt);
660 661
		return 0;
	}
662

663
	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
664 665 666 667
	BUG_ON(!rc);

	r2t->exp_statsn = rhdr->statsn;
	r2t->data_length = be32_to_cpu(rhdr->data_length);
668
	if (r2t->data_length == 0) {
669 670
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with zero data len\n");
671
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
672
			    sizeof(void*));
673 674 675
		return ISCSI_ERR_DATALEN;
	}

676 677 678 679 680
	if (r2t->data_length > session->max_burst)
		debug_scsi("invalid R2T with data len %u and max burst %u."
			   "Attempting to execute request.\n",
			    r2t->data_length, session->max_burst);

681
	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
682
	if (r2t->data_offset + r2t->data_length > scsi_out(task->sc)->length) {
683 684 685
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with data len %u at offset %u "
				  "and total length %d\n", r2t->data_length,
686 687
				  r2t->data_offset, scsi_out(task->sc)->length);
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
688
			    sizeof(void*));
689 690 691 692 693 694
		return ISCSI_ERR_DATALEN;
	}

	r2t->ttt = rhdr->ttt; /* no flip */
	r2t->solicit_datasn = 0;

695
	iscsi_solicit_data_init(conn, task, r2t);
696

697 698
	tcp_task->exp_datasn = r2tsn + 1;
	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
699
	conn->r2t_pdus_cnt++;
700

701
	iscsi_requeue_task(task);
702 703 704
	return 0;
}

O
Olaf Kirch 已提交
705 706 707 708 709
/*
 * Handle incoming reply to DataIn command
 */
static int
iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
710
			  struct iscsi_segment *segment)
O
Olaf Kirch 已提交
711 712 713 714 715
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
	int rc;

716
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
		return ISCSI_ERR_DATA_DGST;

	/* check for non-exceptional status */
	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
		if (rc)
			return rc;
	}

	iscsi_tcp_hdr_recv_prep(tcp_conn);
	return 0;
}

/**
 * iscsi_tcp_hdr_dissect - process PDU header
 * @conn: iSCSI connection
 * @hdr: PDU header
 *
 * This function analyzes the header of the PDU received,
 * and performs several sanity checks. If the PDU is accompanied
 * by data, the receive buffer is set up to copy the incoming data
 * to the correct location.
 */
740
static int
O
Olaf Kirch 已提交
741
iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
742
{
743 744
	int rc = 0, opcode, ahslen;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
745
	struct iscsi_task *task;
746 747

	/* verify PDU length */
748 749
	tcp_conn->in.datalen = ntoh24(hdr->dlength);
	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
750 751 752
		iscsi_conn_printk(KERN_ERR, conn,
				  "iscsi_tcp: datalen %d > %d\n",
				  tcp_conn->in.datalen, conn->max_recv_dlength);
753 754 755
		return ISCSI_ERR_DATALEN;
	}

O
Olaf Kirch 已提交
756 757 758
	/* Additional header segments. So far, we don't
	 * process additional headers.
	 */
759
	ahslen = hdr->hlength << 2;
760

761
	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
762
	/* verify itt (itt encoding: age+cid+itt) */
763
	rc = iscsi_verify_itt(conn, hdr->itt);
764
	if (rc)
765
		return rc;
766

O
Olaf Kirch 已提交
767 768
	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
		  opcode, ahslen, tcp_conn->in.datalen);
769

770 771
	switch(opcode) {
	case ISCSI_OP_SCSI_DATA_IN:
772
		spin_lock(&conn->session->lock);
773 774
		task = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!task)
775 776 777 778 779 780 781
			rc = ISCSI_ERR_BAD_ITT;
		else
			rc = iscsi_data_rsp(conn, task);
		if (rc) {
			spin_unlock(&conn->session->lock);
			break;
		}
782

O
Olaf Kirch 已提交
783
		if (tcp_conn->in.datalen) {
784
			struct iscsi_tcp_task *tcp_task = task->dd_data;
O
Olaf Kirch 已提交
785
			struct hash_desc *rx_hash = NULL;
786
			struct scsi_data_buffer *sdb = scsi_in(task->sc);
O
Olaf Kirch 已提交
787 788 789 790

			/*
			 * Setup copy of Data-In into the Scsi_Cmnd
			 * Scatterlist case:
791
			 * We set up the iscsi_segment to point to the next
O
Olaf Kirch 已提交
792 793 794 795 796 797 798 799 800
			 * scatterlist entry to copy to. As we go along,
			 * we move on to the next scatterlist entry and
			 * update the digest per-entry.
			 */
			if (conn->datadgst_en)
				rx_hash = &tcp_conn->rx_hash;

			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
				  "datalen=%d)\n", tcp_conn,
801
				  tcp_task->data_offset,
O
Olaf Kirch 已提交
802
				  tcp_conn->in.datalen);
803 804 805 806 807 808 809 810 811
			rc = iscsi_segment_seek_sg(&tcp_conn->in.segment,
						   sdb->table.sgl,
						   sdb->table.nents,
						   tcp_task->data_offset,
						   tcp_conn->in.datalen,
						   iscsi_tcp_process_data_in,
						   rx_hash);
			spin_unlock(&conn->session->lock);
			return rc;
O
Olaf Kirch 已提交
812
		}
813 814 815
		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
		spin_unlock(&conn->session->lock);
		break;
816
	case ISCSI_OP_SCSI_CMD_RSP:
O
Olaf Kirch 已提交
817 818 819 820 821
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
822 823
		break;
	case ISCSI_OP_R2T:
824
		spin_lock(&conn->session->lock);
825 826
		task = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!task)
827 828
			rc = ISCSI_ERR_BAD_ITT;
		else if (ahslen)
829
			rc = ISCSI_ERR_AHSLEN;
830
		else if (task->sc->sc_data_direction == DMA_TO_DEVICE)
831
			rc = iscsi_r2t_rsp(conn, task);
832
		else
833
			rc = ISCSI_ERR_PROTO;
834
		spin_unlock(&conn->session->lock);
835 836 837 838 839
		break;
	case ISCSI_OP_LOGIN_RSP:
	case ISCSI_OP_TEXT_RSP:
	case ISCSI_OP_REJECT:
	case ISCSI_OP_ASYNC_EVENT:
840 841 842 843 844
		/*
		 * It is possible that we could get a PDU with a buffer larger
		 * than 8K, but there are no targets that currently do this.
		 * For now we fail until we find a vendor that needs it
		 */
O
Olaf Kirch 已提交
845
		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
846 847 848 849 850 851
			iscsi_conn_printk(KERN_ERR, conn,
					  "iscsi_tcp: received buffer of "
					  "len %u but conn buffer is only %u "
					  "(opcode %0x)\n",
					  tcp_conn->in.datalen,
					  ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
852 853 854 855
			rc = ISCSI_ERR_PROTO;
			break;
		}

O
Olaf Kirch 已提交
856 857 858 859 860 861 862
		/* If there's data coming in with the response,
		 * receive it to the connection's buffer.
		 */
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
863
	/* fall through */
864 865
	case ISCSI_OP_LOGOUT_RSP:
	case ISCSI_OP_NOOP_IN:
866 867 868 869 870 871 872
	case ISCSI_OP_SCSI_TMFUNC_RSP:
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
		break;
	default:
		rc = ISCSI_ERR_BAD_OPCODE;
		break;
	}
873

O
Olaf Kirch 已提交
874 875 876 877 878 879
	if (rc == 0) {
		/* Anything that comes with data should have
		 * been handled above. */
		if (tcp_conn->in.datalen)
			return ISCSI_ERR_PROTO;
		iscsi_tcp_hdr_recv_prep(tcp_conn);
880 881
	}

O
Olaf Kirch 已提交
882
	return rc;
883 884
}

O
Olaf Kirch 已提交
885 886 887 888 889 890 891 892 893
/**
 * iscsi_tcp_hdr_recv_done - process PDU header
 *
 * This is the callback invoked when the PDU header has
 * been received. If the header is followed by additional
 * header segments, we go back for more data.
 */
static int
iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
894
			struct iscsi_segment *segment)
895
{
O
Olaf Kirch 已提交
896 897
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr;
898

O
Olaf Kirch 已提交
899 900 901 902 903
	/* Check if there are additional header segments
	 * *prior* to computing the digest, because we
	 * may need to go back to the caller for more.
	 */
	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
904
	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
O
Olaf Kirch 已提交
905 906 907 908 909 910 911 912 913
		/* Bump the header length - the caller will
		 * just loop around and get the AHS for us, and
		 * call again. */
		unsigned int ahslen = hdr->hlength << 2;

		/* Make sure we don't overflow */
		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
			return ISCSI_ERR_AHSLEN;

914 915
		segment->total_size += ahslen;
		segment->size += ahslen;
O
Olaf Kirch 已提交
916
		return 0;
917 918
	}

O
Olaf Kirch 已提交
919 920 921 922
	/* We're done processing the header. See if we're doing
	 * header digests; if so, set up the recv_digest buffer
	 * and go back for more. */
	if (conn->hdrdgst_en) {
923 924 925
		if (segment->digest_len == 0) {
			iscsi_tcp_segment_splice_digest(segment,
							segment->recv_digest);
O
Olaf Kirch 已提交
926
			return 0;
927
		}
O
Olaf Kirch 已提交
928
		iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
929 930
				      segment->total_copied - ISCSI_DIGEST_SIZE,
				      segment->digest);
931

932
		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
933
			return ISCSI_ERR_HDR_DGST;
934
	}
O
Olaf Kirch 已提交
935 936 937

	tcp_conn->in.hdr = hdr;
	return iscsi_tcp_hdr_dissect(conn, hdr);
938 939 940
}

/**
O
Olaf Kirch 已提交
941
 * iscsi_tcp_recv - TCP receive in sendfile fashion
942 943 944 945 946 947
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
 **/
static int
O
Olaf Kirch 已提交
948 949
iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
	       unsigned int offset, size_t len)
950 951
{
	struct iscsi_conn *conn = rd_desc->arg.data;
952
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
953
	struct iscsi_segment *segment = &tcp_conn->in.segment;
O
Olaf Kirch 已提交
954 955 956
	struct skb_seq_state seq;
	unsigned int consumed = 0;
	int rc = 0;
957

O
Olaf Kirch 已提交
958
	debug_tcp("in %d bytes\n", skb->len - offset);
959 960 961 962 963 964

	if (unlikely(conn->suspend_rx)) {
		debug_tcp("conn %d Rx suspended!\n", conn->id);
		return 0;
	}

O
Olaf Kirch 已提交
965 966 967 968
	skb_prepare_seq_read(skb, offset, skb->len, &seq);
	while (1) {
		unsigned int avail;
		const u8 *ptr;
969

O
Olaf Kirch 已提交
970
		avail = skb_seq_read(consumed, &ptr, &seq);
971 972 973
		if (avail == 0) {
			debug_tcp("no more data avail. Consumed %d\n",
				  consumed);
O
Olaf Kirch 已提交
974
			break;
975 976
		}
		BUG_ON(segment->copied >= segment->size);
O
Olaf Kirch 已提交
977 978

		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
979
		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
O
Olaf Kirch 已提交
980 981 982
		BUG_ON(rc == 0);
		consumed += rc;

983 984 985
		if (segment->total_copied >= segment->total_size) {
			debug_tcp("segment done\n");
			rc = segment->done(tcp_conn, segment);
O
Olaf Kirch 已提交
986 987 988
			if (rc != 0) {
				skb_abort_seq_read(&seq);
				goto error;
989
			}
990

O
Olaf Kirch 已提交
991
			/* The done() functions sets up the
992
			 * next segment. */
993 994
		}
	}
995
	skb_abort_seq_read(&seq);
O
Olaf Kirch 已提交
996 997
	conn->rxdata_octets += consumed;
	return consumed;
998

O
Olaf Kirch 已提交
999 1000 1001 1002
error:
	debug_tcp("Error receiving PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return 0;
1003 1004 1005 1006 1007 1008
}

static void
iscsi_tcp_data_ready(struct sock *sk, int flag)
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
1009
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1010 1011 1012 1013
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

1014
	/*
O
Olaf Kirch 已提交
1015
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
1016
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
1017
	 * hand us all the skbs that are available. iscsi_tcp_recv
1018 1019
	 * handled pdus that cross buffers or pdus that still need data.
	 */
1020
	rd_desc.arg.data = conn;
1021
	rd_desc.count = 1;
O
Olaf Kirch 已提交
1022
	tcp_read_sock(sk, &rd_desc, iscsi_tcp_recv);
1023 1024

	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
1025 1026 1027

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
1028
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
1029 1030 1031 1032 1033
}

static void
iscsi_tcp_state_change(struct sock *sk)
{
1034
	struct iscsi_tcp_conn *tcp_conn;
1035 1036 1037 1038 1039 1040 1041 1042 1043
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

M
Mike Christie 已提交
1044 1045 1046
	if ((sk->sk_state == TCP_CLOSE_WAIT ||
	     sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc)) {
1047 1048 1049 1050
		debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

1051 1052
	tcp_conn = conn->dd_data;
	old_state_change = tcp_conn->old_state_change;
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
static void
iscsi_write_space(struct sock *sk)
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
1067 1068 1069
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	tcp_conn->old_write_space(sk);
1070
	debug_tcp("iscsi_write_space: cid %d\n", conn->id);
1071
	scsi_queue_work(conn->session->host, &conn->xmitwork);
1072 1073 1074 1075 1076
}

static void
iscsi_conn_set_callbacks(struct iscsi_conn *conn)
{
1077 1078
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk = tcp_conn->sock->sk;
1079 1080 1081 1082

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
1083 1084 1085
	tcp_conn->old_data_ready = sk->sk_data_ready;
	tcp_conn->old_state_change = sk->sk_state_change;
	tcp_conn->old_write_space = sk->sk_write_space;
1086 1087 1088 1089 1090 1091 1092
	sk->sk_data_ready = iscsi_tcp_data_ready;
	sk->sk_state_change = iscsi_tcp_state_change;
	sk->sk_write_space = iscsi_write_space;
	write_unlock_bh(&sk->sk_callback_lock);
}

static void
1093
iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
1094
{
1095
	struct sock *sk = tcp_conn->sock->sk;
1096 1097 1098 1099

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
1100 1101 1102
	sk->sk_data_ready   = tcp_conn->old_data_ready;
	sk->sk_state_change = tcp_conn->old_state_change;
	sk->sk_write_space  = tcp_conn->old_write_space;
1103 1104 1105 1106 1107
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
1108 1109 1110 1111
 * iscsi_xmit - TCP transmit
 **/
static int
iscsi_xmit(struct iscsi_conn *conn)
1112
{
1113
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1114 1115 1116
	struct iscsi_segment *segment = &tcp_conn->out.segment;
	unsigned int consumed = 0;
	int rc = 0;
1117

1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
	while (1) {
		rc = iscsi_tcp_xmit_segment(tcp_conn, segment);
		if (rc < 0)
			goto error;
		if (rc == 0)
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
				if (rc < 0)
					goto error;
			}
		}
1134 1135
	}

1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
	debug_tcp("xmit %d bytes\n", consumed);

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
	debug_tcp("Error sending PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return rc;
1147 1148 1149
}

/**
1150 1151
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
1152
static inline int
1153
iscsi_tcp_xmit_qlen(struct iscsi_conn *conn)
1154
{
1155 1156
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_segment *segment = &tcp_conn->out.segment;
1157

1158
	return segment->total_copied - segment->total_size;
1159 1160 1161
}

static inline int
1162
iscsi_tcp_flush(struct iscsi_conn *conn)
1163
{
1164 1165 1166 1167 1168
	int rc;

	while (iscsi_tcp_xmit_qlen(conn)) {
		rc = iscsi_xmit(conn);
		if (rc == 0)
1169
			return -EAGAIN;
1170 1171
		if (rc < 0)
			return rc;
1172
	}
1173

1174
	return 0;
1175 1176
}

1177 1178 1179 1180 1181 1182 1183
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
static int
iscsi_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
			struct iscsi_segment *segment)
1184
{
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
	tcp_conn->out.segment = tcp_conn->out.data_segment;
	debug_tcp("Header done. Next segment size %u total_size %u\n",
		  tcp_conn->out.segment.size, tcp_conn->out.segment.total_size);
	return 0;
}

static void
iscsi_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, size_t hdrlen)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

1196
	debug_tcp("%s(%p%s)\n", __func__, tcp_conn,
1197 1198 1199 1200 1201 1202 1203 1204
			conn->hdrdgst_en? ", digest enabled" : "");

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
	memset(&tcp_conn->out.data_segment, 0, sizeof(struct iscsi_segment));

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
1205
	 * sure that both iscsi_tcp_task and mtask have
1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
		iscsi_tcp_dgst_header(&tcp_conn->tx_hash, hdr, hdrlen,
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
	tcp_conn->out.hdr = hdr;

	iscsi_segment_init_linear(&tcp_conn->out.segment, hdr, hdrlen,
				iscsi_tcp_send_hdr_done, NULL);
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
iscsi_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			 unsigned int count, unsigned int offset,
			 unsigned int len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

1237
	debug_tcp("%s(%p, offset=%d, datalen=%d%s)\n", __func__,
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
			tcp_conn, offset, len,
			conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	return iscsi_segment_seek_sg(&tcp_conn->out.data_segment,
				   sg, count, offset, len,
				   NULL, tx_hash);
}

static void
iscsi_tcp_send_linear_data_prepare(struct iscsi_conn *conn, void *data,
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

1262
	debug_tcp("%s(%p, datalen=%d%s)\n", __func__, tcp_conn, len,
1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274
		  conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	iscsi_segment_init_linear(&tcp_conn->out.data_segment,
				data, len, NULL, tx_hash);
1275 1276 1277 1278 1279
}

/**
 * iscsi_solicit_data_cont - initialize next Data-Out
 * @conn: iscsi connection
1280
 * @task: scsi command task
1281 1282 1283 1284 1285 1286 1287 1288 1289
 * @r2t: R2T info
 * @left: bytes left to transfer
 *
 * Notes:
 *	Initialize next Data-Out within this R2T sequence and continue
 *	to process next Scatter-Gather element(if any) of this SCSI command.
 *
 *	Called under connection lock.
 **/
1290
static int
1291
iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_task *task,
1292
			struct iscsi_r2t_info *r2t)
1293 1294
{
	struct iscsi_data *hdr;
1295 1296 1297 1298 1299 1300
	int new_offset, left;

	BUG_ON(r2t->data_length - r2t->sent < 0);
	left = r2t->data_length - r2t->sent;
	if (left == 0)
		return 0;
1301

1302
	hdr = &r2t->dtask.hdr;
1303 1304 1305 1306 1307
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
1308 1309
	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
	hdr->itt = task->hdr->itt;
1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321
	hdr->exp_statsn = r2t->exp_statsn;
	new_offset = r2t->data_offset + r2t->sent;
	hdr->offset = cpu_to_be32(new_offset);
	if (left > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
	} else {
		hton24(hdr->dlength, left);
		r2t->data_count = left;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}

1322 1323
	conn->dataout_pdus_cnt++;
	return 1;
1324 1325 1326
}

/**
1327
 * iscsi_tcp_task - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
1328
 * @conn: iscsi connection
1329
 * @task: scsi command task
1330 1331
 * @sc: scsi command
 **/
1332
static int
1333
iscsi_tcp_task_init(struct iscsi_task *task)
1334
{
1335 1336 1337
	struct iscsi_tcp_task *tcp_task = task->dd_data;
	struct iscsi_conn *conn = task->conn;
	struct scsi_cmnd *sc = task->sc;
1338
	int err;
1339

1340 1341
	if (!sc) {
		/*
1342
		 * mgmt tasks do not have a scatterlist since they come
1343 1344
		 * in from the iscsi interface.
		 */
1345 1346
		debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id,
			   task->itt);
1347 1348

		/* Prepare PDU, optionally w/ immediate data */
1349
		iscsi_tcp_send_hdr_prep(conn, task->hdr, sizeof(*task->hdr));
1350 1351

		/* If we have immediate data, attach a payload */
1352 1353 1354
		if (task->data_count)
			iscsi_tcp_send_linear_data_prepare(conn, task->data,
							   task->data_count);
1355 1356 1357
		return 0;
	}

1358 1359 1360
	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
	tcp_task->sent = 0;
	tcp_task->exp_datasn = 0;
1361 1362

	/* Prepare PDU, optionally w/ immediate data */
1363 1364 1365 1366
	debug_scsi("task deq [cid %d itt 0x%x imm %d unsol %d]\n",
		    conn->id, task->itt, task->imm_count,
		    task->unsol_count);
	iscsi_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
1367

1368
	if (!task->imm_count)
1369 1370 1371
		return 0;

	/* If we have immediate data, attach a payload */
1372 1373
	err = iscsi_tcp_send_data_prep(conn, scsi_out(sc)->table.sgl,
				       scsi_out(sc)->table.nents,
1374
				       0, task->imm_count);
1375 1376
	if (err)
		return err;
1377 1378
	tcp_task->sent += task->imm_count;
	task->imm_count = 0;
1379
	return 0;
1380 1381
}

1382
/*
1383 1384
 * iscsi_tcp_task_xmit - xmit normal PDU task
 * @task: iscsi command task
1385 1386 1387 1388 1389
 *
 * We're expected to return 0 when everything was transmitted succesfully,
 * -EAGAIN if there's still data in the queue, or != 0 for any other kind
 * of error.
 */
1390
static int
1391
iscsi_tcp_task_xmit(struct iscsi_task *task)
1392
{
1393 1394 1395
	struct iscsi_conn *conn = task->conn;
	struct iscsi_tcp_task *tcp_task = task->dd_data;
	struct scsi_cmnd *sc = task->sc;
1396
	struct scsi_data_buffer *sdb;
1397
	int rc = 0;
1398

1399 1400 1401 1402
flush:
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
1403 1404
		return rc;

1405 1406
	/* mgmt command */
	if (!sc) {
1407 1408
		if (task->hdr->itt == RESERVED_ITT)
			iscsi_put_task(task);
1409 1410 1411
		return 0;
	}

1412 1413 1414
	/* Are we done already? */
	if (sc->sc_data_direction != DMA_TO_DEVICE)
		return 0;
1415

1416
	sdb = scsi_out(sc);
1417 1418
	if (task->unsol_count != 0) {
		struct iscsi_data *hdr = &tcp_task->unsol_dtask.hdr;
1419

1420 1421
		/* Prepare a header for the unsolicited PDU.
		 * The amount of data we want to send will be
1422
		 * in task->data_count.
1423 1424
		 * FIXME: return the data count instead.
		 */
1425
		iscsi_prep_unsolicit_data_pdu(task, hdr);
1426

1427
		debug_tcp("unsol dout [itt 0x%x doff %d dlen %d]\n",
1428
				task->itt, tcp_task->sent, task->data_count);
1429

1430
		iscsi_tcp_send_hdr_prep(conn, hdr, sizeof(*hdr));
1431
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
1432 1433
					      sdb->table.nents, tcp_task->sent,
					      task->data_count);
1434
		if (rc)
1435
			goto fail;
1436 1437
		tcp_task->sent += task->data_count;
		task->unsol_count -= task->data_count;
1438 1439 1440 1441
		goto flush;
	} else {
		struct iscsi_session *session = conn->session;
		struct iscsi_r2t_info *r2t;
1442

1443
		/* All unsolicited PDUs sent. Check for solicited PDUs.
1444
		 */
1445
		spin_lock_bh(&session->lock);
1446
		r2t = tcp_task->r2t;
1447 1448
		if (r2t != NULL) {
			/* Continue with this R2T? */
1449
			if (!iscsi_solicit_data_cont(conn, task, r2t)) {
1450 1451
				debug_scsi("  done with r2t %p\n", r2t);

1452
				__kfifo_put(tcp_task->r2tpool.queue,
1453
					    (void*)&r2t, sizeof(void*));
1454
				tcp_task->r2t = r2t = NULL;
1455
			}
1456 1457
		}

1458
		if (r2t == NULL) {
1459
			__kfifo_get(tcp_task->r2tqueue, (void*)&tcp_task->r2t,
1460
				    sizeof(void*));
1461
			r2t = tcp_task->r2t;
1462
		}
1463
		spin_unlock_bh(&session->lock);
1464

1465 1466 1467 1468
		/* Waiting for more R2Ts to arrive. */
		if (r2t == NULL) {
			debug_tcp("no R2Ts yet\n");
			return 0;
1469 1470
		}

1471
		debug_scsi("sol dout %p [dsn %d itt 0x%x doff %d dlen %d]\n",
1472
			r2t, r2t->solicit_datasn - 1, task->itt,
1473
			r2t->data_offset + r2t->sent, r2t->data_count);
1474

1475 1476
		iscsi_tcp_send_hdr_prep(conn, &r2t->dtask.hdr,
					sizeof(struct iscsi_hdr));
1477

1478 1479
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
					      sdb->table.nents,
1480 1481
					      r2t->data_offset + r2t->sent,
					      r2t->data_count);
1482
		if (rc)
1483
			goto fail;
1484
		tcp_task->sent += r2t->data_count;
1485 1486
		r2t->sent += r2t->data_count;
		goto flush;
1487 1488
	}
	return 0;
1489 1490 1491
fail:
	iscsi_conn_failure(conn, rc);
	return -EIO;
1492 1493
}

1494 1495
static struct iscsi_cls_conn *
iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
1496
{
1497 1498 1499
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
1500

1501
	cls_conn = iscsi_conn_setup(cls_session, sizeof(*tcp_conn), conn_idx);
1502 1503 1504
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
1505
	/*
1506 1507
	 * due to strange issues with iser these are not set
	 * in iscsi_conn_setup
1508
	 */
1509
	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
1510

1511
	tcp_conn = conn->dd_data;
1512
	tcp_conn->iscsi_conn = conn;
1513

1514 1515 1516
	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->tx_hash.flags = 0;
1517
	if (IS_ERR(tcp_conn->tx_hash.tfm))
1518
		goto free_conn;
1519

1520 1521 1522
	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->rx_hash.flags = 0;
1523
	if (IS_ERR(tcp_conn->rx_hash.tfm))
1524 1525
		goto free_tx_tfm;

1526
	return cls_conn;
1527

1528
free_tx_tfm:
1529
	crypto_free_hash(tcp_conn->tx_hash.tfm);
1530
free_conn:
1531 1532 1533 1534 1535
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
1536 1537
	iscsi_conn_teardown(cls_conn);
	return NULL;
1538 1539
}

1540 1541 1542
static void
iscsi_tcp_release_conn(struct iscsi_conn *conn)
{
1543
	struct iscsi_session *session = conn->session;
1544
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1545
	struct socket *sock = tcp_conn->sock;
1546

1547
	if (!sock)
1548 1549
		return;

1550
	sock_hold(sock->sk);
1551
	iscsi_conn_restore_callbacks(tcp_conn);
1552
	sock_put(sock->sk);
1553

1554
	spin_lock_bh(&session->lock);
1555
	tcp_conn->sock = NULL;
1556 1557
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
1558 1559
}

1560
static void
1561
iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
1562
{
1563 1564
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1565

1566
	iscsi_tcp_release_conn(conn);
1567

P
Pete Wyckoff 已提交
1568 1569 1570 1571
	if (tcp_conn->tx_hash.tfm)
		crypto_free_hash(tcp_conn->tx_hash.tfm);
	if (tcp_conn->rx_hash.tfm)
		crypto_free_hash(tcp_conn->rx_hash.tfm);
1572

1573
	iscsi_conn_teardown(cls_conn);
1574
}
1575

1576 1577 1578 1579
static void
iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
	struct iscsi_conn *conn = cls_conn->dd_data;
1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	/* userspace may have goofed up and not bound us */
	if (!tcp_conn->sock)
		return;
	/*
	 * Make sure our recv side is stopped.
	 * Older tools called conn stop before ep_disconnect
	 * so IO could still be coming in.
	 */
	write_lock_bh(&tcp_conn->sock->sk->sk_callback_lock);
	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
	write_unlock_bh(&tcp_conn->sock->sk->sk_callback_lock);
1593 1594 1595 1596 1597

	iscsi_conn_stop(cls_conn, flag);
	iscsi_tcp_release_conn(conn);
}

1598 1599 1600 1601 1602 1603 1604 1605 1606 1607
static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
			      char *buf, int *port,
			      int (*getname)(struct socket *, struct sockaddr *,
					int *addrlen))
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

1608
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr));
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

1638 1639
static int
iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
1640
		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
1641 1642
		    int is_leading)
{
1643 1644
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
	struct iscsi_host *ihost = shost_priv(shost);
1645 1646 1647 1648 1649
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk;
	struct socket *sock;
	int err;
1650

1651
	/* lookup for existing socket */
1652
	sock = sockfd_lookup((int)transport_eph, &err);
1653
	if (!sock) {
1654 1655
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
1656
		return -EEXIST;
1657
	}
1658 1659 1660 1661 1662 1663 1664 1665 1666 1667
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
	err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
				 &conn->portal_port, kernel_getpeername);
	if (err)
		goto free_socket;

1668 1669
	err = iscsi_tcp_get_addr(conn, sock, ihost->local_address,
				&ihost->local_port, kernel_getsockname);
1670 1671
	if (err)
		goto free_socket;
1672

1673 1674
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
1675
		goto free_socket;
1676

1677 1678
	/* bind iSCSI connection and socket */
	tcp_conn->sock = sock;
1679

1680 1681 1682 1683 1684
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
1685

1686 1687 1688 1689 1690
	iscsi_conn_set_callbacks(conn);
	tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
1691
	iscsi_tcp_hdr_recv_prep(tcp_conn);
1692
	return 0;
1693 1694 1695 1696

free_socket:
	sockfd_put(sock);
	return err;
1697 1698 1699 1700 1701 1702 1703 1704 1705
}

static int
iscsi_r2tpool_alloc(struct iscsi_session *session)
{
	int i;
	int cmd_i;

	/*
1706
	 * initialize per-task: R2T pool and xmit queue
1707 1708
	 */
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1709 1710
	        struct iscsi_task *task = session->cmds[cmd_i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1711 1712 1713 1714 1715 1716 1717 1718

		/*
		 * pre-allocated x4 as much r2ts to handle race when
		 * target acks DataOut faster than we data_xmit() queues
		 * could replenish r2tqueue.
		 */

		/* R2T pool */
1719
		if (iscsi_pool_init(&tcp_task->r2tpool, session->max_r2t * 4, NULL,
1720
				    sizeof(struct iscsi_r2t_info))) {
1721 1722 1723 1724
			goto r2t_alloc_fail;
		}

		/* R2T xmit queue */
1725
		tcp_task->r2tqueue = kfifo_alloc(
1726
		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
1727 1728
		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
			iscsi_pool_free(&tcp_task->r2tpool);
1729 1730 1731 1732 1733 1734 1735 1736
			goto r2t_alloc_fail;
		}
	}

	return 0;

r2t_alloc_fail:
	for (i = 0; i < cmd_i; i++) {
1737 1738
		struct iscsi_task *task = session->cmds[i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1739

1740 1741
		kfifo_free(tcp_task->r2tqueue);
		iscsi_pool_free(&tcp_task->r2tpool);
1742 1743 1744 1745 1746 1747 1748 1749 1750 1751
	}
	return -ENOMEM;
}

static void
iscsi_r2tpool_free(struct iscsi_session *session)
{
	int i;

	for (i = 0; i < session->cmds_max; i++) {
1752 1753
		struct iscsi_task *task = session->cmds[i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1754

1755 1756
		kfifo_free(tcp_task->r2tqueue);
		iscsi_pool_free(&tcp_task->r2tpool);
1757 1758 1759 1760
	}
}

static int
1761
iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
1762
		     char *buf, int buflen)
1763
{
1764
	struct iscsi_conn *conn = cls_conn->dd_data;
1765
	struct iscsi_session *session = conn->session;
1766
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1767
	int value;
1768 1769 1770

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
1771
		iscsi_set_param(cls_conn, param, buf, buflen);
1772 1773
		break;
	case ISCSI_PARAM_DATADGST_EN:
1774
		iscsi_set_param(cls_conn, param, buf, buflen);
1775 1776
		tcp_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
1777 1778
		break;
	case ISCSI_PARAM_MAX_R2T:
1779
		sscanf(buf, "%d", &value);
1780 1781 1782
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
1783 1784
			break;
		iscsi_r2tpool_free(session);
1785
		iscsi_set_param(cls_conn, param, buf, buflen);
1786 1787 1788 1789
		if (iscsi_r2tpool_alloc(session))
			return -ENOMEM;
		break;
	default:
1790
		return iscsi_set_param(cls_conn, param, buf, buflen);
1791 1792 1793 1794 1795 1796
	}

	return 0;
}

static int
1797 1798
iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
			 enum iscsi_param param, char *buf)
1799
{
1800
	struct iscsi_conn *conn = cls_conn->dd_data;
1801
	int len;
1802 1803

	switch(param) {
1804
	case ISCSI_PARAM_CONN_PORT:
1805 1806 1807
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
1808
		break;
1809
	case ISCSI_PARAM_CONN_ADDRESS:
1810 1811 1812
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
1813 1814
		break;
	default:
1815
		return iscsi_conn_get_param(cls_conn, param, buf);
1816 1817 1818 1819 1820
	}

	return len;
}

1821
static void
1822
iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
1823
{
1824
	struct iscsi_conn *conn = cls_conn->dd_data;
1825
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837

	stats->txdata_octets = conn->txdata_octets;
	stats->rxdata_octets = conn->rxdata_octets;
	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
	stats->dataout_pdus = conn->dataout_pdus_cnt;
	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
	stats->datain_pdus = conn->datain_pdus_cnt;
	stats->r2t_pdus = conn->r2t_pdus_cnt;
	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
1838
	stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
1839
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
1840
	stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
1841 1842 1843 1844
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
}

1845
static struct iscsi_cls_session *
1846
iscsi_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
1847 1848
			 uint16_t qdepth, uint32_t initial_cmdsn,
			 uint32_t *hostno)
1849
{
1850 1851
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
1852
	struct Scsi_Host *shost;
1853
	int cmd_i;
1854

1855 1856
	if (ep) {
		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
1857 1858 1859
		return NULL;
	}

1860
	shost = iscsi_host_alloc(&iscsi_sht, 0, qdepth);
1861
	if (!shost)
1862
		return NULL;
1863 1864 1865 1866
	shost->transportt = iscsi_tcp_scsi_transport;
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
1867
	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
1868

1869
	if (iscsi_host_add(shost, NULL))
1870 1871 1872 1873
		goto free_host;
	*hostno = shost->host_no;

	cls_session = iscsi_session_setup(&iscsi_tcp_transport, shost, cmds_max,
1874
					  sizeof(struct iscsi_tcp_task),
1875
					  initial_cmdsn, 0);
1876 1877 1878
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
1879

1880
	shost->can_queue = session->scsi_cmds_max;
1881
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1882 1883
		struct iscsi_task *task = session->cmds[cmd_i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1884

1885 1886
		task->hdr = &tcp_task->hdr.cmd_hdr;
		task->hdr_max = sizeof(tcp_task->hdr) - ISCSI_DIGEST_SIZE;
1887 1888
	}

1889 1890
	if (iscsi_r2tpool_alloc(session))
		goto remove_session;
1891 1892
	return cls_session;

1893
remove_session:
1894
	iscsi_session_teardown(cls_session);
1895
remove_host:
1896
	iscsi_host_remove(shost);
1897
free_host:
1898
	iscsi_host_free(shost);
1899 1900 1901 1902 1903
	return NULL;
}

static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
{
1904 1905 1906 1907
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);

	iscsi_r2tpool_free(cls_session->dd_data);

1908 1909
	iscsi_host_remove(shost);
	iscsi_host_free(shost);
1910 1911
}

1912 1913
static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
{
1914
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
1915 1916 1917 1918
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

1919
static struct scsi_host_template iscsi_sht = {
1920
	.module			= THIS_MODULE,
1921
	.name			= "iSCSI Initiator over TCP/IP",
1922 1923
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
1924
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
1925
	.sg_tablesize		= 4096,
1926
	.max_sectors		= 0xFFFF,
1927 1928
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
1929
	.eh_device_reset_handler= iscsi_eh_device_reset,
1930 1931
	.eh_host_reset_handler	= iscsi_eh_host_reset,
	.use_clustering         = DISABLE_CLUSTERING,
1932
	.slave_configure        = iscsi_tcp_slave_configure,
1933 1934 1935 1936
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

1937 1938 1939 1940 1941
static struct iscsi_transport iscsi_tcp_transport = {
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
1955
				  ISCSI_CONN_ADDRESS |
1956 1957 1958
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
1959 1960
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
1961
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
1962 1963
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
				  ISCSI_LU_RESET_TMO |
1964 1965
				  ISCSI_PING_TMO | ISCSI_RECV_TMO |
				  ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
1966
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
1967 1968
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
1969 1970 1971 1972 1973 1974 1975
	/* session management */
	.create_session		= iscsi_tcp_session_create,
	.destroy_session	= iscsi_tcp_session_destroy,
	/* connection management */
	.create_conn		= iscsi_tcp_conn_create,
	.bind_conn		= iscsi_tcp_conn_bind,
	.destroy_conn		= iscsi_tcp_conn_destroy,
1976
	.set_param		= iscsi_conn_set_param,
1977
	.get_conn_param		= iscsi_tcp_conn_get_param,
1978
	.get_session_param	= iscsi_session_get_param,
1979
	.start_conn		= iscsi_conn_start,
1980
	.stop_conn		= iscsi_tcp_conn_stop,
1981
	/* iscsi host params */
1982
	.get_host_param		= iscsi_host_get_param,
1983
	.set_host_param		= iscsi_host_set_param,
1984
	/* IO */
1985 1986
	.send_pdu		= iscsi_conn_send_pdu,
	.get_stats		= iscsi_conn_get_stats,
1987 1988 1989
	.init_task		= iscsi_tcp_task_init,
	.xmit_task		= iscsi_tcp_task_xmit,
	.cleanup_task		= iscsi_tcp_cleanup_task,
1990
	/* recovery */
M
Mike Christie 已提交
1991
	.session_recovery_timedout = iscsi_session_recovery_timedout,
1992 1993 1994 1995 1996 1997
};

static int __init
iscsi_tcp_init(void)
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
1998 1999
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
2000 2001 2002
		return -EINVAL;
	}

2003 2004 2005
	iscsi_tcp_scsi_transport = iscsi_register_transport(
							&iscsi_tcp_transport);
	if (!iscsi_tcp_scsi_transport)
2006
		return -ENODEV;
2007

2008
	return 0;
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
}

static void __exit
iscsi_tcp_exit(void)
{
	iscsi_unregister_transport(&iscsi_tcp_transport);
}

module_init(iscsi_tcp_init);
module_exit(iscsi_tcp_exit);