iscsi_tcp.c 52.4 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/list.h>
#include <linux/inet.h>
M
Mike Christie 已提交
32
#include <linux/file.h>
33 34 35 36 37 38 39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41 42 43 44 45 46 47 48 49 50
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");
O
Olaf Kirch 已提交
51
#undef DEBUG_TCP
52 53 54
#define DEBUG_ASSERT

#ifdef DEBUG_TCP
55
#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
56 57 58 59 60 61 62 63 64 65 66
#else
#define debug_tcp(fmt...)
#endif

#ifndef DEBUG_ASSERT
#ifdef BUG_ON
#undef BUG_ON
#endif
#define BUG_ON(expr)
#endif

67 68 69 70
static struct scsi_transport_template *iscsi_tcp_scsi_transport;
static struct scsi_host_template iscsi_sht;
static struct iscsi_transport iscsi_tcp_transport;

71 72 73
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

O
Olaf Kirch 已提交
74
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
75
				   struct iscsi_segment *segment);
76

O
Olaf Kirch 已提交
77
/*
78
 * Scatterlist handling: inside the iscsi_segment, we
O
Olaf Kirch 已提交
79 80 81 82 83 84 85 86 87 88 89 90
 * remember an index into the scatterlist, and set data/size
 * to the current scatterlist entry. For highmem pages, we
 * kmap as needed.
 *
 * Note that the page is unmapped when we return from
 * TCP's data_ready handler, so we may end up mapping and
 * unmapping the same page repeatedly. The whole reason
 * for this is that we shouldn't keep the page mapped
 * outside the softirq.
 */

/**
91 92 93
 * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
 * @segment: the buffer object
 * @sg: scatterlist
O
Olaf Kirch 已提交
94 95
 * @offset: byte offset into that sg entry
 *
96
 * This function sets up the segment so that subsequent
O
Olaf Kirch 已提交
97 98 99 100
 * data is copied to the indicated sg entry, at the given
 * offset.
 */
static inline void
101 102
iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
			  struct scatterlist *sg, unsigned int offset)
O
Olaf Kirch 已提交
103
{
104 105 106 107 108
	segment->sg = sg;
	segment->sg_offset = offset;
	segment->size = min(sg->length - offset,
			    segment->total_size - segment->total_copied);
	segment->data = NULL;
O
Olaf Kirch 已提交
109 110 111
}

/**
112 113 114
 * iscsi_tcp_segment_map - map the current S/G page
 * @segment: iscsi_segment
 * @recv: 1 if called from recv path
O
Olaf Kirch 已提交
115 116 117 118 119 120
 *
 * We only need to possibly kmap data if scatter lists are being used,
 * because the iscsi passthrough and internal IO paths will never use high
 * mem pages.
 */
static inline void
121
iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
O
Olaf Kirch 已提交
122 123 124
{
	struct scatterlist *sg;

125
	if (segment->data != NULL || !segment->sg)
O
Olaf Kirch 已提交
126 127
		return;

128 129
	sg = segment->sg;
	BUG_ON(segment->sg_mapped);
O
Olaf Kirch 已提交
130
	BUG_ON(sg->length == 0);
131 132 133 134 135 136 137 138 139 140 141 142 143 144

	/*
	 * If the page count is greater than one it is ok to send
	 * to the network layer's zero copy send path. If not we
	 * have to go the slow sendmsg path. We always map for the
	 * recv path.
	 */
	if (page_count(sg_page(sg)) >= 1 && !recv)
		return;

	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
		  segment);
	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
O
Olaf Kirch 已提交
145 146 147
}

static inline void
148
iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
O
Olaf Kirch 已提交
149
{
150 151 152 153 154 155 156
	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);

	if (segment->sg_mapped) {
		debug_tcp("iscsi_tcp_segment_unmap valid\n");
		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
		segment->sg_mapped = NULL;
		segment->data = NULL;
O
Olaf Kirch 已提交
157 158 159 160 161 162 163
	}
}

/*
 * Splice the digest buffer into the buffer
 */
static inline void
164
iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
O
Olaf Kirch 已提交
165
{
166 167 168 169 170 171 172
	segment->data = digest;
	segment->digest_len = ISCSI_DIGEST_SIZE;
	segment->total_size += ISCSI_DIGEST_SIZE;
	segment->size = ISCSI_DIGEST_SIZE;
	segment->copied = 0;
	segment->sg = NULL;
	segment->hash = NULL;
O
Olaf Kirch 已提交
173 174 175
}

/**
176 177 178 179
 * iscsi_tcp_segment_done - check whether the segment is complete
 * @segment: iscsi segment to check
 * @recv: set to one of this is called from the recv path
 * @copied: number of bytes copied
O
Olaf Kirch 已提交
180
 *
181
 * Check if we're done receiving this segment. If the receive
O
Olaf Kirch 已提交
182 183 184 185 186 187 188 189
 * buffer is full but we expect more data, move on to the
 * next entry in the scatterlist.
 *
 * If the amount of data we received isn't a multiple of 4,
 * we will transparently receive the pad bytes, too.
 *
 * This function must be re-entrant.
 */
190
static inline int
191
iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv, unsigned copied)
192
{
O
Olaf Kirch 已提交
193
	static unsigned char padbuf[ISCSI_PAD_LEN];
194
	struct scatterlist sg;
195
	unsigned int pad;
O
Olaf Kirch 已提交
196

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
		  segment->size, recv ? "recv" : "xmit");
	if (segment->hash && copied) {
		/*
		 * If a segment is kmapd we must unmap it before sending
		 * to the crypto layer since that will try to kmap it again.
		 */
		iscsi_tcp_segment_unmap(segment);

		if (!segment->data) {
			sg_init_table(&sg, 1);
			sg_set_page(&sg, sg_page(segment->sg), copied,
				    segment->copied + segment->sg_offset +
							segment->sg->offset);
		} else
			sg_init_one(&sg, segment->data + segment->copied,
				    copied);
		crypto_hash_update(segment->hash, &sg, copied);
	}

	segment->copied += copied;
	if (segment->copied < segment->size) {
		iscsi_tcp_segment_map(segment, recv);
O
Olaf Kirch 已提交
220 221
		return 0;
	}
222

223 224 225
	segment->total_copied += segment->copied;
	segment->copied = 0;
	segment->size = 0;
226

O
Olaf Kirch 已提交
227
	/* Unmap the current scatterlist page, if there is one. */
228
	iscsi_tcp_segment_unmap(segment);
O
Olaf Kirch 已提交
229 230

	/* Do we have more scatterlist entries? */
231 232 233
	debug_tcp("total copied %u total size %u\n", segment->total_copied,
		   segment->total_size);
	if (segment->total_copied < segment->total_size) {
O
Olaf Kirch 已提交
234
		/* Proceed to the next entry in the scatterlist. */
235 236 237 238
		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
					  0);
		iscsi_tcp_segment_map(segment, recv);
		BUG_ON(segment->size == 0);
O
Olaf Kirch 已提交
239 240 241 242
		return 0;
	}

	/* Do we need to handle padding? */
243
	pad = iscsi_padding(segment->total_copied);
244
	if (pad != 0) {
O
Olaf Kirch 已提交
245
		debug_tcp("consume %d pad bytes\n", pad);
246 247 248
		segment->total_size += pad;
		segment->size = pad;
		segment->data = padbuf;
O
Olaf Kirch 已提交
249 250 251 252
		return 0;
	}

	/*
253
	 * Set us up for transferring the data digest. hdr digest
O
Olaf Kirch 已提交
254 255
	 * is completely handled in hdr done function.
	 */
256 257 258 259 260
	if (segment->hash) {
		crypto_hash_final(segment->hash, segment->digest);
		iscsi_tcp_segment_splice_digest(segment,
				 recv ? segment->recv_digest : segment->digest);
		return 0;
O
Olaf Kirch 已提交
261
	}
262

O
Olaf Kirch 已提交
263 264
	return 1;
}
265

O
Olaf Kirch 已提交
266
/**
267
 * iscsi_tcp_xmit_segment - transmit segment
O
Olaf Kirch 已提交
268
 * @tcp_conn: the iSCSI TCP connection
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
static int
iscsi_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment)
{
	struct socket *sk = tcp_conn->sock;
	unsigned int copied = 0;
	int r = 0;

	while (!iscsi_tcp_segment_done(segment, 0, r)) {
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_conn->sendpage(sk, sg_page(sg), offset, copy,
					       flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			if (copied || r == -EAGAIN)
				break;
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_tcp_segment_recv - copy data to segment
 * @tcp_conn: the iSCSI TCP connection
 * @segment: the buffer to copy to
O
Olaf Kirch 已提交
330 331 332 333 334 335 336 337 338 339 340 341 342 343
 * @ptr: data pointer
 * @len: amount of data available
 *
 * This function copies up to @len bytes to the
 * given buffer, and returns the number of bytes
 * consumed, which can actually be less than @len.
 *
 * If hash digest is enabled, the function will update the
 * hash while copying.
 * Combining these two operations doesn't buy us a lot (yet),
 * but in the future we could implement combined copy+crc,
 * just way we do for network layer checksums.
 */
static int
344 345 346
iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment, const void *ptr,
		       unsigned int len)
O
Olaf Kirch 已提交
347
{
348
	unsigned int copy = 0, copied = 0;
349

350 351 352 353 354
	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
		if (copied == len) {
			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
				  len);
			break;
O
Olaf Kirch 已提交
355
		}
356 357 358 359

		copy = min(len - copied, segment->size - segment->copied);
		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
		memcpy(segment->data + segment->copied, ptr + copied, copy);
O
Olaf Kirch 已提交
360 361 362 363 364 365 366 367 368 369
		copied += copy;
	}
	return copied;
}

static inline void
iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
		      unsigned char digest[ISCSI_DIGEST_SIZE])
{
	struct scatterlist sg;
370

O
Olaf Kirch 已提交
371 372 373 374 375 376
	sg_init_one(&sg, hdr, hdrlen);
	crypto_hash_digest(hash, &sg, hdrlen, digest);
}

static inline int
iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
377
		      struct iscsi_segment *segment)
O
Olaf Kirch 已提交
378
{
379
	if (!segment->digest_len)
O
Olaf Kirch 已提交
380 381
		return 1;

382 383
	if (memcmp(segment->recv_digest, segment->digest,
		   segment->digest_len)) {
O
Olaf Kirch 已提交
384 385 386 387 388 389 390 391
		debug_scsi("digest mismatch\n");
		return 0;
	}

	return 1;
}

/*
392
 * Helper function to set up segment buffer
O
Olaf Kirch 已提交
393 394
 */
static inline void
395 396
__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
397
{
398 399 400
	memset(segment, 0, sizeof(*segment));
	segment->total_size = size;
	segment->done = done;
O
Olaf Kirch 已提交
401 402

	if (hash) {
403
		segment->hash = hash;
O
Olaf Kirch 已提交
404 405 406 407 408
		crypto_hash_init(hash);
	}
}

static inline void
409 410 411
iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
			  size_t size, iscsi_segment_done_fn_t *done,
			  struct hash_desc *hash)
O
Olaf Kirch 已提交
412
{
413 414 415
	__iscsi_segment_init(segment, size, done, hash);
	segment->data = data;
	segment->size = size;
O
Olaf Kirch 已提交
416 417 418
}

static inline int
419 420 421 422
iscsi_segment_seek_sg(struct iscsi_segment *segment,
		      struct scatterlist *sg_list, unsigned int sg_count,
		      unsigned int offset, size_t size,
		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
423
{
424
	struct scatterlist *sg;
O
Olaf Kirch 已提交
425 426
	unsigned int i;

427 428 429 430 431 432 433 434
	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
		  offset, size);
	__iscsi_segment_init(segment, size, done, hash);
	for_each_sg(sg_list, sg, sg_count, i) {
		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
			   sg->offset);
		if (offset < sg->length) {
			iscsi_tcp_segment_init_sg(segment, sg, offset);
O
Olaf Kirch 已提交
435
			return 0;
436
		}
437
		offset -= sg->length;
438 439
	}

O
Olaf Kirch 已提交
440 441 442 443
	return ISCSI_ERR_DATA_OFFSET;
}

/**
444
 * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
O
Olaf Kirch 已提交
445 446 447 448 449 450 451 452 453 454 455
 * @tcp_conn: iscsi connection to prep for
 *
 * This function always passes NULL for the hash argument, because when this
 * function is called we do not yet know the final size of the header and want
 * to delay the digest processing until we know that.
 */
static void
iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
456
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
457 458 459 460 461 462 463 464 465
				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
				iscsi_tcp_hdr_recv_done, NULL);
}

/*
 * Handle incoming reply to any other type of command
 */
static int
iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
466
			 struct iscsi_segment *segment)
O
Olaf Kirch 已提交
467 468 469 470
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	int rc = 0;

471
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
472 473 474 475 476 477 478 479
		return ISCSI_ERR_DATA_DGST;

	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
			conn->data, tcp_conn->in.datalen);
	if (rc)
		return rc;

	iscsi_tcp_hdr_recv_prep(tcp_conn);
480 481 482
	return 0;
}

O
Olaf Kirch 已提交
483 484 485 486 487 488 489 490 491
static void
iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct hash_desc *rx_hash = NULL;

	if (conn->datadgst_en)
		rx_hash = &tcp_conn->rx_hash;

492
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
493 494 495 496
				conn->data, tcp_conn->in.datalen,
				iscsi_tcp_data_recv_done, rx_hash);
}

M
Mike Christie 已提交
497 498 499 500
/*
 * must be called with session lock
 */
static void
501
iscsi_tcp_cleanup_task(struct iscsi_conn *conn, struct iscsi_task *task)
502
{
503
	struct iscsi_tcp_task *tcp_task = task->dd_data;
504
	struct iscsi_r2t_info *r2t;
505

506 507
	/* nothing to do for mgmt tasks */
	if (!task->sc)
508 509
		return;

510 511 512
	/* flush task's r2t queues */
	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
513
			    sizeof(void*));
514
		debug_scsi("iscsi_tcp_cleanup_task pending r2t dropped\n");
515 516
	}

517
	r2t = tcp_task->r2t;
518
	if (r2t != NULL) {
519
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
520
			    sizeof(void*));
521
		tcp_task->r2t = NULL;
522
	}
523 524 525
}

/**
526
 * iscsi_data_in - SCSI Data-In Response processing
527
 * @conn: iscsi connection
528
 * @task: scsi command task
529 530
 **/
static int
531
iscsi_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
532
{
533
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
534
	struct iscsi_tcp_task *tcp_task = task->dd_data;
535
	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
536
	int datasn = be32_to_cpu(rhdr->datasn);
537
	unsigned total_in_length = scsi_in(task->sc)->length;
538

539
	iscsi_update_cmdsn(conn->session, (struct iscsi_nopin*)rhdr);
540
	if (tcp_conn->in.datalen == 0)
541 542
		return 0;

543 544
	if (tcp_task->exp_datasn != datasn) {
		debug_tcp("%s: task->exp_datasn(%d) != rhdr->datasn(%d)\n",
545
		          __func__, tcp_task->exp_datasn, datasn);
546
		return ISCSI_ERR_DATASN;
547
	}
548

549
	tcp_task->exp_datasn++;
550

551 552
	tcp_task->data_offset = be32_to_cpu(rhdr->offset);
	if (tcp_task->data_offset + tcp_conn->in.datalen > total_in_length) {
553
		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
554
		          __func__, tcp_task->data_offset,
555
		          tcp_conn->in.datalen, total_in_length);
556
		return ISCSI_ERR_DATA_OFFSET;
557
	}
558 559 560 561 562 563 564 565

	conn->datain_pdus_cnt++;
	return 0;
}

/**
 * iscsi_solicit_data_init - initialize first Data-Out
 * @conn: iscsi connection
566
 * @task: scsi command task
567 568 569 570 571 572 573 574 575
 * @r2t: R2T info
 *
 * Notes:
 *	Initialize first Data-Out within this R2T sequence and finds
 *	proper data_offset within this SCSI command.
 *
 *	This function is called with connection lock taken.
 **/
static void
576
iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_task *task,
577 578 579 580
			struct iscsi_r2t_info *r2t)
{
	struct iscsi_data *hdr;

581
	hdr = &r2t->dtask.hdr;
582 583 584 585 586
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
587 588
	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
	hdr->itt = task->hdr->itt;
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
	hdr->exp_statsn = r2t->exp_statsn;
	hdr->offset = cpu_to_be32(r2t->data_offset);
	if (r2t->data_length > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
		hdr->flags = 0;
	} else {
		hton24(hdr->dlength, r2t->data_length);
		r2t->data_count = r2t->data_length;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}
	conn->dataout_pdus_cnt++;

	r2t->sent = 0;
}

/**
 * iscsi_r2t_rsp - iSCSI R2T Response processing
 * @conn: iscsi connection
608
 * @task: scsi command task
609 610
 **/
static int
611
iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
612 613 614
{
	struct iscsi_r2t_info *r2t;
	struct iscsi_session *session = conn->session;
615
	struct iscsi_tcp_task *tcp_task = task->dd_data;
616 617
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
618 619 620
	int r2tsn = be32_to_cpu(rhdr->r2tsn);
	int rc;

621
	if (tcp_conn->in.datalen) {
622 623 624
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2t with datalen %d\n",
				  tcp_conn->in.datalen);
625
		return ISCSI_ERR_DATALEN;
626
	}
627

628 629
	if (tcp_task->exp_datasn != r2tsn){
		debug_tcp("%s: task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
630
		          __func__, tcp_task->exp_datasn, r2tsn);
631
		return ISCSI_ERR_R2TSN;
632
	}
633

634
	/* fill-in new R2T associated with the task */
635 636
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);

637
	if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
638 639
		iscsi_conn_printk(KERN_INFO, conn,
				  "dropping R2T itt %d in recovery.\n",
640
				  task->itt);
641 642
		return 0;
	}
643

644
	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
645 646 647 648
	BUG_ON(!rc);

	r2t->exp_statsn = rhdr->statsn;
	r2t->data_length = be32_to_cpu(rhdr->data_length);
649
	if (r2t->data_length == 0) {
650 651
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with zero data len\n");
652
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
653
			    sizeof(void*));
654 655 656
		return ISCSI_ERR_DATALEN;
	}

657 658 659 660 661
	if (r2t->data_length > session->max_burst)
		debug_scsi("invalid R2T with data len %u and max burst %u."
			   "Attempting to execute request.\n",
			    r2t->data_length, session->max_burst);

662
	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
663
	if (r2t->data_offset + r2t->data_length > scsi_out(task->sc)->length) {
664 665 666
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with data len %u at offset %u "
				  "and total length %d\n", r2t->data_length,
667 668
				  r2t->data_offset, scsi_out(task->sc)->length);
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
669
			    sizeof(void*));
670 671 672 673 674 675
		return ISCSI_ERR_DATALEN;
	}

	r2t->ttt = rhdr->ttt; /* no flip */
	r2t->solicit_datasn = 0;

676
	iscsi_solicit_data_init(conn, task, r2t);
677

678 679
	tcp_task->exp_datasn = r2tsn + 1;
	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
680
	conn->r2t_pdus_cnt++;
681

682
	iscsi_requeue_task(task);
683 684 685
	return 0;
}

O
Olaf Kirch 已提交
686 687 688 689 690
/*
 * Handle incoming reply to DataIn command
 */
static int
iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
691
			  struct iscsi_segment *segment)
O
Olaf Kirch 已提交
692 693 694 695 696
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
	int rc;

697
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
		return ISCSI_ERR_DATA_DGST;

	/* check for non-exceptional status */
	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
		if (rc)
			return rc;
	}

	iscsi_tcp_hdr_recv_prep(tcp_conn);
	return 0;
}

/**
 * iscsi_tcp_hdr_dissect - process PDU header
 * @conn: iSCSI connection
 * @hdr: PDU header
 *
 * This function analyzes the header of the PDU received,
 * and performs several sanity checks. If the PDU is accompanied
 * by data, the receive buffer is set up to copy the incoming data
 * to the correct location.
 */
721
static int
O
Olaf Kirch 已提交
722
iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
723
{
724 725
	int rc = 0, opcode, ahslen;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
726
	struct iscsi_task *task;
727 728

	/* verify PDU length */
729 730
	tcp_conn->in.datalen = ntoh24(hdr->dlength);
	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
731 732 733
		iscsi_conn_printk(KERN_ERR, conn,
				  "iscsi_tcp: datalen %d > %d\n",
				  tcp_conn->in.datalen, conn->max_recv_dlength);
734 735 736
		return ISCSI_ERR_DATALEN;
	}

O
Olaf Kirch 已提交
737 738 739
	/* Additional header segments. So far, we don't
	 * process additional headers.
	 */
740
	ahslen = hdr->hlength << 2;
741

742
	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
743
	/* verify itt (itt encoding: age+cid+itt) */
744
	rc = iscsi_verify_itt(conn, hdr->itt);
745
	if (rc)
746
		return rc;
747

O
Olaf Kirch 已提交
748 749
	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
		  opcode, ahslen, tcp_conn->in.datalen);
750

751 752
	switch(opcode) {
	case ISCSI_OP_SCSI_DATA_IN:
753
		spin_lock(&conn->session->lock);
754 755
		task = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!task)
756 757
			rc = ISCSI_ERR_BAD_ITT;
		else
758
			rc = iscsi_data_in(conn, task);
759 760 761 762
		if (rc) {
			spin_unlock(&conn->session->lock);
			break;
		}
763

O
Olaf Kirch 已提交
764
		if (tcp_conn->in.datalen) {
765
			struct iscsi_tcp_task *tcp_task = task->dd_data;
O
Olaf Kirch 已提交
766
			struct hash_desc *rx_hash = NULL;
767
			struct scsi_data_buffer *sdb = scsi_in(task->sc);
O
Olaf Kirch 已提交
768 769 770 771

			/*
			 * Setup copy of Data-In into the Scsi_Cmnd
			 * Scatterlist case:
772
			 * We set up the iscsi_segment to point to the next
O
Olaf Kirch 已提交
773 774 775 776 777 778 779 780 781
			 * scatterlist entry to copy to. As we go along,
			 * we move on to the next scatterlist entry and
			 * update the digest per-entry.
			 */
			if (conn->datadgst_en)
				rx_hash = &tcp_conn->rx_hash;

			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
				  "datalen=%d)\n", tcp_conn,
782
				  tcp_task->data_offset,
O
Olaf Kirch 已提交
783
				  tcp_conn->in.datalen);
784 785 786 787 788 789 790 791 792
			rc = iscsi_segment_seek_sg(&tcp_conn->in.segment,
						   sdb->table.sgl,
						   sdb->table.nents,
						   tcp_task->data_offset,
						   tcp_conn->in.datalen,
						   iscsi_tcp_process_data_in,
						   rx_hash);
			spin_unlock(&conn->session->lock);
			return rc;
O
Olaf Kirch 已提交
793
		}
794 795 796
		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
		spin_unlock(&conn->session->lock);
		break;
797
	case ISCSI_OP_SCSI_CMD_RSP:
O
Olaf Kirch 已提交
798 799 800 801 802
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
803 804
		break;
	case ISCSI_OP_R2T:
805
		spin_lock(&conn->session->lock);
806 807
		task = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!task)
808 809
			rc = ISCSI_ERR_BAD_ITT;
		else if (ahslen)
810
			rc = ISCSI_ERR_AHSLEN;
811
		else if (task->sc->sc_data_direction == DMA_TO_DEVICE)
812
			rc = iscsi_r2t_rsp(conn, task);
813
		else
814
			rc = ISCSI_ERR_PROTO;
815
		spin_unlock(&conn->session->lock);
816 817 818 819 820
		break;
	case ISCSI_OP_LOGIN_RSP:
	case ISCSI_OP_TEXT_RSP:
	case ISCSI_OP_REJECT:
	case ISCSI_OP_ASYNC_EVENT:
821 822 823 824 825
		/*
		 * It is possible that we could get a PDU with a buffer larger
		 * than 8K, but there are no targets that currently do this.
		 * For now we fail until we find a vendor that needs it
		 */
O
Olaf Kirch 已提交
826
		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
827 828 829 830 831 832
			iscsi_conn_printk(KERN_ERR, conn,
					  "iscsi_tcp: received buffer of "
					  "len %u but conn buffer is only %u "
					  "(opcode %0x)\n",
					  tcp_conn->in.datalen,
					  ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
833 834 835 836
			rc = ISCSI_ERR_PROTO;
			break;
		}

O
Olaf Kirch 已提交
837 838 839 840 841 842 843
		/* If there's data coming in with the response,
		 * receive it to the connection's buffer.
		 */
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
844
	/* fall through */
845 846
	case ISCSI_OP_LOGOUT_RSP:
	case ISCSI_OP_NOOP_IN:
847 848 849 850 851 852 853
	case ISCSI_OP_SCSI_TMFUNC_RSP:
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
		break;
	default:
		rc = ISCSI_ERR_BAD_OPCODE;
		break;
	}
854

O
Olaf Kirch 已提交
855 856 857 858 859 860
	if (rc == 0) {
		/* Anything that comes with data should have
		 * been handled above. */
		if (tcp_conn->in.datalen)
			return ISCSI_ERR_PROTO;
		iscsi_tcp_hdr_recv_prep(tcp_conn);
861 862
	}

O
Olaf Kirch 已提交
863
	return rc;
864 865
}

O
Olaf Kirch 已提交
866 867 868 869 870 871 872 873 874
/**
 * iscsi_tcp_hdr_recv_done - process PDU header
 *
 * This is the callback invoked when the PDU header has
 * been received. If the header is followed by additional
 * header segments, we go back for more data.
 */
static int
iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
875
			struct iscsi_segment *segment)
876
{
O
Olaf Kirch 已提交
877 878
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr;
879

O
Olaf Kirch 已提交
880 881 882 883 884
	/* Check if there are additional header segments
	 * *prior* to computing the digest, because we
	 * may need to go back to the caller for more.
	 */
	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
885
	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
O
Olaf Kirch 已提交
886 887 888 889 890 891 892 893 894
		/* Bump the header length - the caller will
		 * just loop around and get the AHS for us, and
		 * call again. */
		unsigned int ahslen = hdr->hlength << 2;

		/* Make sure we don't overflow */
		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
			return ISCSI_ERR_AHSLEN;

895 896
		segment->total_size += ahslen;
		segment->size += ahslen;
O
Olaf Kirch 已提交
897
		return 0;
898 899
	}

O
Olaf Kirch 已提交
900 901 902 903
	/* We're done processing the header. See if we're doing
	 * header digests; if so, set up the recv_digest buffer
	 * and go back for more. */
	if (conn->hdrdgst_en) {
904 905 906
		if (segment->digest_len == 0) {
			iscsi_tcp_segment_splice_digest(segment,
							segment->recv_digest);
O
Olaf Kirch 已提交
907
			return 0;
908
		}
O
Olaf Kirch 已提交
909
		iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
910 911
				      segment->total_copied - ISCSI_DIGEST_SIZE,
				      segment->digest);
912

913
		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
914
			return ISCSI_ERR_HDR_DGST;
915
	}
O
Olaf Kirch 已提交
916 917 918

	tcp_conn->in.hdr = hdr;
	return iscsi_tcp_hdr_dissect(conn, hdr);
919 920 921
}

/**
O
Olaf Kirch 已提交
922
 * iscsi_tcp_recv - TCP receive in sendfile fashion
923 924 925 926 927 928
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
 **/
static int
O
Olaf Kirch 已提交
929 930
iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
	       unsigned int offset, size_t len)
931 932
{
	struct iscsi_conn *conn = rd_desc->arg.data;
933
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
934
	struct iscsi_segment *segment = &tcp_conn->in.segment;
O
Olaf Kirch 已提交
935 936 937
	struct skb_seq_state seq;
	unsigned int consumed = 0;
	int rc = 0;
938

O
Olaf Kirch 已提交
939
	debug_tcp("in %d bytes\n", skb->len - offset);
940 941 942 943 944 945

	if (unlikely(conn->suspend_rx)) {
		debug_tcp("conn %d Rx suspended!\n", conn->id);
		return 0;
	}

O
Olaf Kirch 已提交
946 947 948 949
	skb_prepare_seq_read(skb, offset, skb->len, &seq);
	while (1) {
		unsigned int avail;
		const u8 *ptr;
950

O
Olaf Kirch 已提交
951
		avail = skb_seq_read(consumed, &ptr, &seq);
952 953 954
		if (avail == 0) {
			debug_tcp("no more data avail. Consumed %d\n",
				  consumed);
O
Olaf Kirch 已提交
955
			break;
956 957
		}
		BUG_ON(segment->copied >= segment->size);
O
Olaf Kirch 已提交
958 959

		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
960
		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
O
Olaf Kirch 已提交
961 962 963
		BUG_ON(rc == 0);
		consumed += rc;

964 965 966
		if (segment->total_copied >= segment->total_size) {
			debug_tcp("segment done\n");
			rc = segment->done(tcp_conn, segment);
O
Olaf Kirch 已提交
967 968 969
			if (rc != 0) {
				skb_abort_seq_read(&seq);
				goto error;
970
			}
971

O
Olaf Kirch 已提交
972
			/* The done() functions sets up the
973
			 * next segment. */
974 975
		}
	}
976
	skb_abort_seq_read(&seq);
O
Olaf Kirch 已提交
977 978
	conn->rxdata_octets += consumed;
	return consumed;
979

O
Olaf Kirch 已提交
980 981
error:
	debug_tcp("Error receiving PDU, errno=%d\n", rc);
982
	iscsi_conn_failure(conn, rc);
O
Olaf Kirch 已提交
983
	return 0;
984 985 986 987 988 989
}

static void
iscsi_tcp_data_ready(struct sock *sk, int flag)
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
990
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
991 992 993 994
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

995
	/*
O
Olaf Kirch 已提交
996
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
997
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
998
	 * hand us all the skbs that are available. iscsi_tcp_recv
999 1000
	 * handled pdus that cross buffers or pdus that still need data.
	 */
1001
	rd_desc.arg.data = conn;
1002
	rd_desc.count = 1;
O
Olaf Kirch 已提交
1003
	tcp_read_sock(sk, &rd_desc, iscsi_tcp_recv);
1004 1005

	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
1006 1007 1008

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
1009
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
1010 1011 1012 1013 1014
}

static void
iscsi_tcp_state_change(struct sock *sk)
{
1015
	struct iscsi_tcp_conn *tcp_conn;
1016 1017 1018 1019 1020 1021 1022 1023 1024
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

M
Mike Christie 已提交
1025 1026 1027
	if ((sk->sk_state == TCP_CLOSE_WAIT ||
	     sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc)) {
1028 1029 1030 1031
		debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

1032 1033
	tcp_conn = conn->dd_data;
	old_state_change = tcp_conn->old_state_change;
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
static void
iscsi_write_space(struct sock *sk)
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
1048 1049 1050
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	tcp_conn->old_write_space(sk);
1051
	debug_tcp("iscsi_write_space: cid %d\n", conn->id);
1052
	scsi_queue_work(conn->session->host, &conn->xmitwork);
1053 1054 1055 1056 1057
}

static void
iscsi_conn_set_callbacks(struct iscsi_conn *conn)
{
1058 1059
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk = tcp_conn->sock->sk;
1060 1061 1062 1063

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
1064 1065 1066
	tcp_conn->old_data_ready = sk->sk_data_ready;
	tcp_conn->old_state_change = sk->sk_state_change;
	tcp_conn->old_write_space = sk->sk_write_space;
1067 1068 1069 1070 1071 1072 1073
	sk->sk_data_ready = iscsi_tcp_data_ready;
	sk->sk_state_change = iscsi_tcp_state_change;
	sk->sk_write_space = iscsi_write_space;
	write_unlock_bh(&sk->sk_callback_lock);
}

static void
1074
iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
1075
{
1076
	struct sock *sk = tcp_conn->sock->sk;
1077 1078 1079 1080

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
1081 1082 1083
	sk->sk_data_ready   = tcp_conn->old_data_ready;
	sk->sk_state_change = tcp_conn->old_state_change;
	sk->sk_write_space  = tcp_conn->old_write_space;
1084 1085 1086 1087 1088
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
1089 1090 1091 1092
 * iscsi_xmit - TCP transmit
 **/
static int
iscsi_xmit(struct iscsi_conn *conn)
1093
{
1094
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1095 1096 1097
	struct iscsi_segment *segment = &tcp_conn->out.segment;
	unsigned int consumed = 0;
	int rc = 0;
1098

1099 1100
	while (1) {
		rc = iscsi_tcp_xmit_segment(tcp_conn, segment);
1101 1102
		if (rc < 0) {
			rc = ISCSI_ERR_XMIT_FAILED;
1103
			goto error;
1104
		}
1105 1106 1107 1108 1109 1110 1111 1112
		if (rc == 0)
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
1113
				if (rc != 0)
1114 1115 1116
					goto error;
			}
		}
1117 1118
	}

1119 1120 1121 1122 1123 1124 1125 1126 1127
	debug_tcp("xmit %d bytes\n", consumed);

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
	debug_tcp("Error sending PDU, errno=%d\n", rc);
1128 1129
	iscsi_conn_failure(conn, rc);
	return -EIO;
1130 1131 1132
}

/**
1133 1134
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
1135
static inline int
1136
iscsi_tcp_xmit_qlen(struct iscsi_conn *conn)
1137
{
1138 1139
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_segment *segment = &tcp_conn->out.segment;
1140

1141
	return segment->total_copied - segment->total_size;
1142 1143 1144
}

static inline int
1145
iscsi_tcp_flush(struct iscsi_conn *conn)
1146
{
1147 1148 1149 1150 1151
	int rc;

	while (iscsi_tcp_xmit_qlen(conn)) {
		rc = iscsi_xmit(conn);
		if (rc == 0)
1152
			return -EAGAIN;
1153 1154
		if (rc < 0)
			return rc;
1155
	}
1156

1157
	return 0;
1158 1159
}

1160 1161 1162 1163 1164 1165 1166
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
static int
iscsi_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
			struct iscsi_segment *segment)
1167
{
1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
	tcp_conn->out.segment = tcp_conn->out.data_segment;
	debug_tcp("Header done. Next segment size %u total_size %u\n",
		  tcp_conn->out.segment.size, tcp_conn->out.segment.total_size);
	return 0;
}

static void
iscsi_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, size_t hdrlen)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

1179
	debug_tcp("%s(%p%s)\n", __func__, tcp_conn,
1180 1181 1182 1183 1184 1185 1186 1187
			conn->hdrdgst_en? ", digest enabled" : "");

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
	memset(&tcp_conn->out.data_segment, 0, sizeof(struct iscsi_segment));

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
1188
	 * sure that both iscsi_tcp_task and mtask have
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
		iscsi_tcp_dgst_header(&tcp_conn->tx_hash, hdr, hdrlen,
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
	tcp_conn->out.hdr = hdr;

	iscsi_segment_init_linear(&tcp_conn->out.segment, hdr, hdrlen,
				iscsi_tcp_send_hdr_done, NULL);
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
iscsi_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			 unsigned int count, unsigned int offset,
			 unsigned int len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

1220
	debug_tcp("%s(%p, offset=%d, datalen=%d%s)\n", __func__,
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244
			tcp_conn, offset, len,
			conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	return iscsi_segment_seek_sg(&tcp_conn->out.data_segment,
				   sg, count, offset, len,
				   NULL, tx_hash);
}

static void
iscsi_tcp_send_linear_data_prepare(struct iscsi_conn *conn, void *data,
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

1245
	debug_tcp("%s(%p, datalen=%d%s)\n", __func__, tcp_conn, len,
1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257
		  conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	iscsi_segment_init_linear(&tcp_conn->out.data_segment,
				data, len, NULL, tx_hash);
1258 1259 1260 1261 1262
}

/**
 * iscsi_solicit_data_cont - initialize next Data-Out
 * @conn: iscsi connection
1263
 * @task: scsi command task
1264 1265 1266 1267 1268 1269 1270 1271 1272
 * @r2t: R2T info
 * @left: bytes left to transfer
 *
 * Notes:
 *	Initialize next Data-Out within this R2T sequence and continue
 *	to process next Scatter-Gather element(if any) of this SCSI command.
 *
 *	Called under connection lock.
 **/
1273
static int
1274
iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_task *task,
1275
			struct iscsi_r2t_info *r2t)
1276 1277
{
	struct iscsi_data *hdr;
1278 1279 1280 1281 1282 1283
	int new_offset, left;

	BUG_ON(r2t->data_length - r2t->sent < 0);
	left = r2t->data_length - r2t->sent;
	if (left == 0)
		return 0;
1284

1285
	hdr = &r2t->dtask.hdr;
1286 1287 1288 1289 1290
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
1291 1292
	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
	hdr->itt = task->hdr->itt;
1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
	hdr->exp_statsn = r2t->exp_statsn;
	new_offset = r2t->data_offset + r2t->sent;
	hdr->offset = cpu_to_be32(new_offset);
	if (left > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
	} else {
		hton24(hdr->dlength, left);
		r2t->data_count = left;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}

1305 1306
	conn->dataout_pdus_cnt++;
	return 1;
1307 1308 1309
}

/**
1310
 * iscsi_tcp_task - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
1311
 * @conn: iscsi connection
1312
 * @task: scsi command task
1313 1314
 * @sc: scsi command
 **/
1315
static int
1316
iscsi_tcp_task_init(struct iscsi_task *task)
1317
{
1318 1319 1320
	struct iscsi_tcp_task *tcp_task = task->dd_data;
	struct iscsi_conn *conn = task->conn;
	struct scsi_cmnd *sc = task->sc;
1321
	int err;
1322

1323 1324
	if (!sc) {
		/*
1325
		 * mgmt tasks do not have a scatterlist since they come
1326 1327
		 * in from the iscsi interface.
		 */
1328 1329
		debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id,
			   task->itt);
1330 1331

		/* Prepare PDU, optionally w/ immediate data */
1332
		iscsi_tcp_send_hdr_prep(conn, task->hdr, sizeof(*task->hdr));
1333 1334

		/* If we have immediate data, attach a payload */
1335 1336 1337
		if (task->data_count)
			iscsi_tcp_send_linear_data_prepare(conn, task->data,
							   task->data_count);
1338 1339 1340
		return 0;
	}

1341 1342 1343
	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
	tcp_task->sent = 0;
	tcp_task->exp_datasn = 0;
1344 1345

	/* Prepare PDU, optionally w/ immediate data */
1346 1347 1348 1349
	debug_scsi("task deq [cid %d itt 0x%x imm %d unsol %d]\n",
		    conn->id, task->itt, task->imm_count,
		    task->unsol_count);
	iscsi_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
1350

1351
	if (!task->imm_count)
1352 1353 1354
		return 0;

	/* If we have immediate data, attach a payload */
1355 1356
	err = iscsi_tcp_send_data_prep(conn, scsi_out(sc)->table.sgl,
				       scsi_out(sc)->table.nents,
1357
				       0, task->imm_count);
1358 1359
	if (err)
		return err;
1360 1361
	tcp_task->sent += task->imm_count;
	task->imm_count = 0;
1362
	return 0;
1363 1364
}

1365
/*
1366 1367
 * iscsi_tcp_task_xmit - xmit normal PDU task
 * @task: iscsi command task
1368 1369 1370 1371 1372
 *
 * We're expected to return 0 when everything was transmitted succesfully,
 * -EAGAIN if there's still data in the queue, or != 0 for any other kind
 * of error.
 */
1373
static int
1374
iscsi_tcp_task_xmit(struct iscsi_task *task)
1375
{
1376 1377 1378
	struct iscsi_conn *conn = task->conn;
	struct iscsi_tcp_task *tcp_task = task->dd_data;
	struct scsi_cmnd *sc = task->sc;
1379
	struct scsi_data_buffer *sdb;
1380
	int rc = 0;
1381

1382 1383 1384 1385
flush:
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
1386 1387
		return rc;

1388 1389
	/* mgmt command */
	if (!sc) {
1390 1391
		if (task->hdr->itt == RESERVED_ITT)
			iscsi_put_task(task);
1392 1393 1394
		return 0;
	}

1395 1396 1397
	/* Are we done already? */
	if (sc->sc_data_direction != DMA_TO_DEVICE)
		return 0;
1398

1399
	sdb = scsi_out(sc);
1400 1401
	if (task->unsol_count != 0) {
		struct iscsi_data *hdr = &tcp_task->unsol_dtask.hdr;
1402

1403 1404
		/* Prepare a header for the unsolicited PDU.
		 * The amount of data we want to send will be
1405
		 * in task->data_count.
1406 1407
		 * FIXME: return the data count instead.
		 */
1408
		iscsi_prep_unsolicit_data_pdu(task, hdr);
1409

1410
		debug_tcp("unsol dout [itt 0x%x doff %d dlen %d]\n",
1411
				task->itt, tcp_task->sent, task->data_count);
1412

1413
		iscsi_tcp_send_hdr_prep(conn, hdr, sizeof(*hdr));
1414
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
1415 1416
					      sdb->table.nents, tcp_task->sent,
					      task->data_count);
1417
		if (rc)
1418
			goto fail;
1419 1420
		tcp_task->sent += task->data_count;
		task->unsol_count -= task->data_count;
1421 1422 1423 1424
		goto flush;
	} else {
		struct iscsi_session *session = conn->session;
		struct iscsi_r2t_info *r2t;
1425

1426
		/* All unsolicited PDUs sent. Check for solicited PDUs.
1427
		 */
1428
		spin_lock_bh(&session->lock);
1429
		r2t = tcp_task->r2t;
1430 1431
		if (r2t != NULL) {
			/* Continue with this R2T? */
1432
			if (!iscsi_solicit_data_cont(conn, task, r2t)) {
1433 1434
				debug_scsi("  done with r2t %p\n", r2t);

1435
				__kfifo_put(tcp_task->r2tpool.queue,
1436
					    (void*)&r2t, sizeof(void*));
1437
				tcp_task->r2t = r2t = NULL;
1438
			}
1439 1440
		}

1441
		if (r2t == NULL) {
1442
			__kfifo_get(tcp_task->r2tqueue, (void*)&tcp_task->r2t,
1443
				    sizeof(void*));
1444
			r2t = tcp_task->r2t;
1445
		}
1446
		spin_unlock_bh(&session->lock);
1447

1448 1449 1450 1451
		/* Waiting for more R2Ts to arrive. */
		if (r2t == NULL) {
			debug_tcp("no R2Ts yet\n");
			return 0;
1452 1453
		}

1454
		debug_scsi("sol dout %p [dsn %d itt 0x%x doff %d dlen %d]\n",
1455
			r2t, r2t->solicit_datasn - 1, task->itt,
1456
			r2t->data_offset + r2t->sent, r2t->data_count);
1457

1458 1459
		iscsi_tcp_send_hdr_prep(conn, &r2t->dtask.hdr,
					sizeof(struct iscsi_hdr));
1460

1461 1462
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
					      sdb->table.nents,
1463 1464
					      r2t->data_offset + r2t->sent,
					      r2t->data_count);
1465
		if (rc)
1466
			goto fail;
1467
		tcp_task->sent += r2t->data_count;
1468 1469
		r2t->sent += r2t->data_count;
		goto flush;
1470 1471
	}
	return 0;
1472 1473 1474
fail:
	iscsi_conn_failure(conn, rc);
	return -EIO;
1475 1476
}

1477 1478
static struct iscsi_cls_conn *
iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
1479
{
1480 1481 1482
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
1483

1484
	cls_conn = iscsi_conn_setup(cls_session, sizeof(*tcp_conn), conn_idx);
1485 1486 1487
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
1488
	/*
1489 1490
	 * due to strange issues with iser these are not set
	 * in iscsi_conn_setup
1491
	 */
1492
	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
1493

1494
	tcp_conn = conn->dd_data;
1495
	tcp_conn->iscsi_conn = conn;
1496

1497 1498 1499
	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->tx_hash.flags = 0;
1500
	if (IS_ERR(tcp_conn->tx_hash.tfm))
1501
		goto free_conn;
1502

1503 1504 1505
	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->rx_hash.flags = 0;
1506
	if (IS_ERR(tcp_conn->rx_hash.tfm))
1507 1508
		goto free_tx_tfm;

1509
	return cls_conn;
1510

1511
free_tx_tfm:
1512
	crypto_free_hash(tcp_conn->tx_hash.tfm);
1513
free_conn:
1514 1515 1516 1517 1518
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
1519 1520
	iscsi_conn_teardown(cls_conn);
	return NULL;
1521 1522
}

1523 1524 1525
static void
iscsi_tcp_release_conn(struct iscsi_conn *conn)
{
1526
	struct iscsi_session *session = conn->session;
1527
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1528
	struct socket *sock = tcp_conn->sock;
1529

1530
	if (!sock)
1531 1532
		return;

1533
	sock_hold(sock->sk);
1534
	iscsi_conn_restore_callbacks(tcp_conn);
1535
	sock_put(sock->sk);
1536

1537
	spin_lock_bh(&session->lock);
1538
	tcp_conn->sock = NULL;
1539 1540
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
1541 1542
}

1543
static void
1544
iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
1545
{
1546 1547
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1548

1549
	iscsi_tcp_release_conn(conn);
1550

P
Pete Wyckoff 已提交
1551 1552 1553 1554
	if (tcp_conn->tx_hash.tfm)
		crypto_free_hash(tcp_conn->tx_hash.tfm);
	if (tcp_conn->rx_hash.tfm)
		crypto_free_hash(tcp_conn->rx_hash.tfm);
1555

1556
	iscsi_conn_teardown(cls_conn);
1557
}
1558

1559 1560 1561 1562
static void
iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
	struct iscsi_conn *conn = cls_conn->dd_data;
1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	/* userspace may have goofed up and not bound us */
	if (!tcp_conn->sock)
		return;
	/*
	 * Make sure our recv side is stopped.
	 * Older tools called conn stop before ep_disconnect
	 * so IO could still be coming in.
	 */
	write_lock_bh(&tcp_conn->sock->sk->sk_callback_lock);
	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
	write_unlock_bh(&tcp_conn->sock->sk->sk_callback_lock);
1576 1577 1578 1579 1580

	iscsi_conn_stop(cls_conn, flag);
	iscsi_tcp_release_conn(conn);
}

1581 1582 1583 1584 1585 1586 1587 1588 1589 1590
static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
			      char *buf, int *port,
			      int (*getname)(struct socket *, struct sockaddr *,
					int *addrlen))
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

1591
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr));
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

1621 1622
static int
iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
1623
		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
1624 1625
		    int is_leading)
{
1626 1627
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
	struct iscsi_host *ihost = shost_priv(shost);
1628 1629 1630 1631 1632
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk;
	struct socket *sock;
	int err;
1633

1634
	/* lookup for existing socket */
1635
	sock = sockfd_lookup((int)transport_eph, &err);
1636
	if (!sock) {
1637 1638
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
1639
		return -EEXIST;
1640
	}
1641 1642 1643 1644 1645 1646 1647 1648 1649 1650
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
	err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
				 &conn->portal_port, kernel_getpeername);
	if (err)
		goto free_socket;

1651 1652
	err = iscsi_tcp_get_addr(conn, sock, ihost->local_address,
				&ihost->local_port, kernel_getsockname);
1653 1654
	if (err)
		goto free_socket;
1655

1656 1657
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
1658
		goto free_socket;
1659

1660 1661
	/* bind iSCSI connection and socket */
	tcp_conn->sock = sock;
1662

1663 1664 1665 1666 1667
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
1668

1669 1670 1671 1672 1673
	iscsi_conn_set_callbacks(conn);
	tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
1674
	iscsi_tcp_hdr_recv_prep(tcp_conn);
1675
	return 0;
1676 1677 1678 1679

free_socket:
	sockfd_put(sock);
	return err;
1680 1681 1682 1683 1684 1685 1686 1687 1688
}

static int
iscsi_r2tpool_alloc(struct iscsi_session *session)
{
	int i;
	int cmd_i;

	/*
1689
	 * initialize per-task: R2T pool and xmit queue
1690 1691
	 */
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1692 1693
	        struct iscsi_task *task = session->cmds[cmd_i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1694 1695 1696 1697 1698 1699 1700 1701

		/*
		 * pre-allocated x4 as much r2ts to handle race when
		 * target acks DataOut faster than we data_xmit() queues
		 * could replenish r2tqueue.
		 */

		/* R2T pool */
1702
		if (iscsi_pool_init(&tcp_task->r2tpool, session->max_r2t * 4, NULL,
1703
				    sizeof(struct iscsi_r2t_info))) {
1704 1705 1706 1707
			goto r2t_alloc_fail;
		}

		/* R2T xmit queue */
1708
		tcp_task->r2tqueue = kfifo_alloc(
1709
		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
1710 1711
		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
			iscsi_pool_free(&tcp_task->r2tpool);
1712 1713 1714 1715 1716 1717 1718 1719
			goto r2t_alloc_fail;
		}
	}

	return 0;

r2t_alloc_fail:
	for (i = 0; i < cmd_i; i++) {
1720 1721
		struct iscsi_task *task = session->cmds[i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1722

1723 1724
		kfifo_free(tcp_task->r2tqueue);
		iscsi_pool_free(&tcp_task->r2tpool);
1725 1726 1727 1728 1729 1730 1731 1732 1733 1734
	}
	return -ENOMEM;
}

static void
iscsi_r2tpool_free(struct iscsi_session *session)
{
	int i;

	for (i = 0; i < session->cmds_max; i++) {
1735 1736
		struct iscsi_task *task = session->cmds[i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1737

1738 1739
		kfifo_free(tcp_task->r2tqueue);
		iscsi_pool_free(&tcp_task->r2tpool);
1740 1741 1742 1743
	}
}

static int
1744
iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
1745
		     char *buf, int buflen)
1746
{
1747
	struct iscsi_conn *conn = cls_conn->dd_data;
1748
	struct iscsi_session *session = conn->session;
1749
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1750
	int value;
1751 1752 1753

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
1754
		iscsi_set_param(cls_conn, param, buf, buflen);
1755 1756
		break;
	case ISCSI_PARAM_DATADGST_EN:
1757
		iscsi_set_param(cls_conn, param, buf, buflen);
1758 1759
		tcp_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
1760 1761
		break;
	case ISCSI_PARAM_MAX_R2T:
1762
		sscanf(buf, "%d", &value);
1763 1764 1765
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
1766 1767
			break;
		iscsi_r2tpool_free(session);
1768
		iscsi_set_param(cls_conn, param, buf, buflen);
1769 1770 1771 1772
		if (iscsi_r2tpool_alloc(session))
			return -ENOMEM;
		break;
	default:
1773
		return iscsi_set_param(cls_conn, param, buf, buflen);
1774 1775 1776 1777 1778 1779
	}

	return 0;
}

static int
1780 1781
iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
			 enum iscsi_param param, char *buf)
1782
{
1783
	struct iscsi_conn *conn = cls_conn->dd_data;
1784
	int len;
1785 1786

	switch(param) {
1787
	case ISCSI_PARAM_CONN_PORT:
1788 1789 1790
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
1791
		break;
1792
	case ISCSI_PARAM_CONN_ADDRESS:
1793 1794 1795
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
1796 1797
		break;
	default:
1798
		return iscsi_conn_get_param(cls_conn, param, buf);
1799 1800 1801 1802 1803
	}

	return len;
}

1804
static void
1805
iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
1806
{
1807
	struct iscsi_conn *conn = cls_conn->dd_data;
1808
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820

	stats->txdata_octets = conn->txdata_octets;
	stats->rxdata_octets = conn->rxdata_octets;
	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
	stats->dataout_pdus = conn->dataout_pdus_cnt;
	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
	stats->datain_pdus = conn->datain_pdus_cnt;
	stats->r2t_pdus = conn->r2t_pdus_cnt;
	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
1821
	stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
1822
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
1823
	stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
1824 1825 1826 1827
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
}

1828
static struct iscsi_cls_session *
1829
iscsi_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
1830 1831
			 uint16_t qdepth, uint32_t initial_cmdsn,
			 uint32_t *hostno)
1832
{
1833 1834
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
1835
	struct Scsi_Host *shost;
1836
	int cmd_i;
1837

1838 1839
	if (ep) {
		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
1840 1841 1842
		return NULL;
	}

1843
	shost = iscsi_host_alloc(&iscsi_sht, 0, qdepth);
1844
	if (!shost)
1845
		return NULL;
1846 1847 1848 1849
	shost->transportt = iscsi_tcp_scsi_transport;
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
1850
	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
1851

1852
	if (iscsi_host_add(shost, NULL))
1853 1854 1855 1856
		goto free_host;
	*hostno = shost->host_no;

	cls_session = iscsi_session_setup(&iscsi_tcp_transport, shost, cmds_max,
1857
					  sizeof(struct iscsi_tcp_task),
1858
					  initial_cmdsn, 0);
1859 1860 1861
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
1862

1863
	shost->can_queue = session->scsi_cmds_max;
1864
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1865 1866
		struct iscsi_task *task = session->cmds[cmd_i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1867

1868 1869
		task->hdr = &tcp_task->hdr.cmd_hdr;
		task->hdr_max = sizeof(tcp_task->hdr) - ISCSI_DIGEST_SIZE;
1870 1871
	}

1872 1873
	if (iscsi_r2tpool_alloc(session))
		goto remove_session;
1874 1875
	return cls_session;

1876
remove_session:
1877
	iscsi_session_teardown(cls_session);
1878
remove_host:
1879
	iscsi_host_remove(shost);
1880
free_host:
1881
	iscsi_host_free(shost);
1882 1883 1884 1885 1886
	return NULL;
}

static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
{
1887 1888 1889
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);

	iscsi_r2tpool_free(cls_session->dd_data);
1890
	iscsi_session_teardown(cls_session);
1891

1892 1893
	iscsi_host_remove(shost);
	iscsi_host_free(shost);
1894 1895
}

1896 1897
static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
{
1898
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
1899 1900 1901 1902
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

1903
static struct scsi_host_template iscsi_sht = {
1904
	.module			= THIS_MODULE,
1905
	.name			= "iSCSI Initiator over TCP/IP",
1906 1907
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
1908
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
1909
	.sg_tablesize		= 4096,
1910
	.max_sectors		= 0xFFFF,
1911 1912
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
1913
	.eh_device_reset_handler= iscsi_eh_device_reset,
1914
	.eh_target_reset_handler= iscsi_eh_target_reset,
1915
	.use_clustering         = DISABLE_CLUSTERING,
1916
	.slave_configure        = iscsi_tcp_slave_configure,
1917 1918 1919 1920
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

1921 1922 1923 1924 1925
static struct iscsi_transport iscsi_tcp_transport = {
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
1939
				  ISCSI_CONN_ADDRESS |
1940 1941 1942
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
1943 1944
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
1945
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
1946 1947
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
				  ISCSI_LU_RESET_TMO |
1948 1949
				  ISCSI_PING_TMO | ISCSI_RECV_TMO |
				  ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
1950
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
1951 1952
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
1953 1954 1955 1956 1957 1958 1959
	/* session management */
	.create_session		= iscsi_tcp_session_create,
	.destroy_session	= iscsi_tcp_session_destroy,
	/* connection management */
	.create_conn		= iscsi_tcp_conn_create,
	.bind_conn		= iscsi_tcp_conn_bind,
	.destroy_conn		= iscsi_tcp_conn_destroy,
1960
	.set_param		= iscsi_conn_set_param,
1961
	.get_conn_param		= iscsi_tcp_conn_get_param,
1962
	.get_session_param	= iscsi_session_get_param,
1963
	.start_conn		= iscsi_conn_start,
1964
	.stop_conn		= iscsi_tcp_conn_stop,
1965
	/* iscsi host params */
1966
	.get_host_param		= iscsi_host_get_param,
1967
	.set_host_param		= iscsi_host_set_param,
1968
	/* IO */
1969 1970
	.send_pdu		= iscsi_conn_send_pdu,
	.get_stats		= iscsi_conn_get_stats,
1971 1972 1973
	.init_task		= iscsi_tcp_task_init,
	.xmit_task		= iscsi_tcp_task_xmit,
	.cleanup_task		= iscsi_tcp_cleanup_task,
1974
	/* recovery */
M
Mike Christie 已提交
1975
	.session_recovery_timedout = iscsi_session_recovery_timedout,
1976 1977 1978 1979 1980 1981
};

static int __init
iscsi_tcp_init(void)
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
1982 1983
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
1984 1985 1986
		return -EINVAL;
	}

1987 1988 1989
	iscsi_tcp_scsi_transport = iscsi_register_transport(
							&iscsi_tcp_transport);
	if (!iscsi_tcp_scsi_transport)
1990
		return -ENODEV;
1991

1992
	return 0;
1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
}

static void __exit
iscsi_tcp_exit(void)
{
	iscsi_unregister_transport(&iscsi_tcp_transport);
}

module_init(iscsi_tcp_init);
module_exit(iscsi_tcp_exit);