iscsi_tcp.c 54.1 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/list.h>
#include <linux/inet.h>
M
Mike Christie 已提交
32
#include <linux/file.h>
33 34 35 36 37 38 39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41 42 43 44 45 46 47 48 49 50
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");
O
Olaf Kirch 已提交
51
#undef DEBUG_TCP
52 53 54
#define DEBUG_ASSERT

#ifdef DEBUG_TCP
55
#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
56 57 58 59 60 61 62 63 64 65 66
#else
#define debug_tcp(fmt...)
#endif

#ifndef DEBUG_ASSERT
#ifdef BUG_ON
#undef BUG_ON
#endif
#define BUG_ON(expr)
#endif

67 68 69 70
static struct scsi_transport_template *iscsi_tcp_scsi_transport;
static struct scsi_host_template iscsi_sht;
static struct iscsi_transport iscsi_tcp_transport;

71 72 73
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

O
Olaf Kirch 已提交
74
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
75
				   struct iscsi_segment *segment);
76

O
Olaf Kirch 已提交
77
/*
78
 * Scatterlist handling: inside the iscsi_segment, we
O
Olaf Kirch 已提交
79 80 81 82 83 84 85 86 87 88 89 90
 * remember an index into the scatterlist, and set data/size
 * to the current scatterlist entry. For highmem pages, we
 * kmap as needed.
 *
 * Note that the page is unmapped when we return from
 * TCP's data_ready handler, so we may end up mapping and
 * unmapping the same page repeatedly. The whole reason
 * for this is that we shouldn't keep the page mapped
 * outside the softirq.
 */

/**
91 92 93
 * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
 * @segment: the buffer object
 * @sg: scatterlist
O
Olaf Kirch 已提交
94 95
 * @offset: byte offset into that sg entry
 *
96
 * This function sets up the segment so that subsequent
O
Olaf Kirch 已提交
97 98 99 100
 * data is copied to the indicated sg entry, at the given
 * offset.
 */
static inline void
101 102
iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
			  struct scatterlist *sg, unsigned int offset)
O
Olaf Kirch 已提交
103
{
104 105 106 107 108
	segment->sg = sg;
	segment->sg_offset = offset;
	segment->size = min(sg->length - offset,
			    segment->total_size - segment->total_copied);
	segment->data = NULL;
O
Olaf Kirch 已提交
109 110 111
}

/**
112 113 114
 * iscsi_tcp_segment_map - map the current S/G page
 * @segment: iscsi_segment
 * @recv: 1 if called from recv path
O
Olaf Kirch 已提交
115 116 117 118 119 120
 *
 * We only need to possibly kmap data if scatter lists are being used,
 * because the iscsi passthrough and internal IO paths will never use high
 * mem pages.
 */
static inline void
121
iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
O
Olaf Kirch 已提交
122 123 124
{
	struct scatterlist *sg;

125
	if (segment->data != NULL || !segment->sg)
O
Olaf Kirch 已提交
126 127
		return;

128 129
	sg = segment->sg;
	BUG_ON(segment->sg_mapped);
O
Olaf Kirch 已提交
130
	BUG_ON(sg->length == 0);
131 132 133 134 135 136 137 138 139 140 141 142 143 144

	/*
	 * If the page count is greater than one it is ok to send
	 * to the network layer's zero copy send path. If not we
	 * have to go the slow sendmsg path. We always map for the
	 * recv path.
	 */
	if (page_count(sg_page(sg)) >= 1 && !recv)
		return;

	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
		  segment);
	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
O
Olaf Kirch 已提交
145 146 147
}

static inline void
148
iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
O
Olaf Kirch 已提交
149
{
150 151 152 153 154 155 156
	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);

	if (segment->sg_mapped) {
		debug_tcp("iscsi_tcp_segment_unmap valid\n");
		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
		segment->sg_mapped = NULL;
		segment->data = NULL;
O
Olaf Kirch 已提交
157 158 159 160 161 162 163
	}
}

/*
 * Splice the digest buffer into the buffer
 */
static inline void
164
iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
O
Olaf Kirch 已提交
165
{
166 167 168 169 170 171 172
	segment->data = digest;
	segment->digest_len = ISCSI_DIGEST_SIZE;
	segment->total_size += ISCSI_DIGEST_SIZE;
	segment->size = ISCSI_DIGEST_SIZE;
	segment->copied = 0;
	segment->sg = NULL;
	segment->hash = NULL;
O
Olaf Kirch 已提交
173 174 175
}

/**
176 177 178 179
 * iscsi_tcp_segment_done - check whether the segment is complete
 * @segment: iscsi segment to check
 * @recv: set to one of this is called from the recv path
 * @copied: number of bytes copied
O
Olaf Kirch 已提交
180
 *
181
 * Check if we're done receiving this segment. If the receive
O
Olaf Kirch 已提交
182 183 184 185 186 187 188 189
 * buffer is full but we expect more data, move on to the
 * next entry in the scatterlist.
 *
 * If the amount of data we received isn't a multiple of 4,
 * we will transparently receive the pad bytes, too.
 *
 * This function must be re-entrant.
 */
190
static inline int
191
iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv, unsigned copied)
192
{
O
Olaf Kirch 已提交
193
	static unsigned char padbuf[ISCSI_PAD_LEN];
194
	struct scatterlist sg;
195
	unsigned int pad;
O
Olaf Kirch 已提交
196

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
		  segment->size, recv ? "recv" : "xmit");
	if (segment->hash && copied) {
		/*
		 * If a segment is kmapd we must unmap it before sending
		 * to the crypto layer since that will try to kmap it again.
		 */
		iscsi_tcp_segment_unmap(segment);

		if (!segment->data) {
			sg_init_table(&sg, 1);
			sg_set_page(&sg, sg_page(segment->sg), copied,
				    segment->copied + segment->sg_offset +
							segment->sg->offset);
		} else
			sg_init_one(&sg, segment->data + segment->copied,
				    copied);
		crypto_hash_update(segment->hash, &sg, copied);
	}

	segment->copied += copied;
	if (segment->copied < segment->size) {
		iscsi_tcp_segment_map(segment, recv);
O
Olaf Kirch 已提交
220 221
		return 0;
	}
222

223 224 225
	segment->total_copied += segment->copied;
	segment->copied = 0;
	segment->size = 0;
226

O
Olaf Kirch 已提交
227
	/* Unmap the current scatterlist page, if there is one. */
228
	iscsi_tcp_segment_unmap(segment);
O
Olaf Kirch 已提交
229 230

	/* Do we have more scatterlist entries? */
231 232 233
	debug_tcp("total copied %u total size %u\n", segment->total_copied,
		   segment->total_size);
	if (segment->total_copied < segment->total_size) {
O
Olaf Kirch 已提交
234
		/* Proceed to the next entry in the scatterlist. */
235 236 237 238
		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
					  0);
		iscsi_tcp_segment_map(segment, recv);
		BUG_ON(segment->size == 0);
O
Olaf Kirch 已提交
239 240 241 242
		return 0;
	}

	/* Do we need to handle padding? */
243
	pad = iscsi_padding(segment->total_copied);
244
	if (pad != 0) {
O
Olaf Kirch 已提交
245
		debug_tcp("consume %d pad bytes\n", pad);
246 247 248
		segment->total_size += pad;
		segment->size = pad;
		segment->data = padbuf;
O
Olaf Kirch 已提交
249 250 251 252
		return 0;
	}

	/*
253
	 * Set us up for transferring the data digest. hdr digest
O
Olaf Kirch 已提交
254 255
	 * is completely handled in hdr done function.
	 */
256 257 258 259 260
	if (segment->hash) {
		crypto_hash_final(segment->hash, segment->digest);
		iscsi_tcp_segment_splice_digest(segment,
				 recv ? segment->recv_digest : segment->digest);
		return 0;
O
Olaf Kirch 已提交
261
	}
262

O
Olaf Kirch 已提交
263 264
	return 1;
}
265

O
Olaf Kirch 已提交
266
/**
267
 * iscsi_tcp_xmit_segment - transmit segment
O
Olaf Kirch 已提交
268
 * @tcp_conn: the iSCSI TCP connection
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
static int
iscsi_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment)
{
	struct socket *sk = tcp_conn->sock;
	unsigned int copied = 0;
	int r = 0;

	while (!iscsi_tcp_segment_done(segment, 0, r)) {
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_conn->sendpage(sk, sg_page(sg), offset, copy,
					       flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			if (copied || r == -EAGAIN)
				break;
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_tcp_segment_recv - copy data to segment
 * @tcp_conn: the iSCSI TCP connection
 * @segment: the buffer to copy to
O
Olaf Kirch 已提交
330 331 332 333 334 335 336 337 338 339 340 341 342 343
 * @ptr: data pointer
 * @len: amount of data available
 *
 * This function copies up to @len bytes to the
 * given buffer, and returns the number of bytes
 * consumed, which can actually be less than @len.
 *
 * If hash digest is enabled, the function will update the
 * hash while copying.
 * Combining these two operations doesn't buy us a lot (yet),
 * but in the future we could implement combined copy+crc,
 * just way we do for network layer checksums.
 */
static int
344 345 346
iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment, const void *ptr,
		       unsigned int len)
O
Olaf Kirch 已提交
347
{
348
	unsigned int copy = 0, copied = 0;
349

350 351 352 353 354
	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
		if (copied == len) {
			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
				  len);
			break;
O
Olaf Kirch 已提交
355
		}
356 357 358 359

		copy = min(len - copied, segment->size - segment->copied);
		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
		memcpy(segment->data + segment->copied, ptr + copied, copy);
O
Olaf Kirch 已提交
360 361 362 363 364 365 366 367 368 369
		copied += copy;
	}
	return copied;
}

static inline void
iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
		      unsigned char digest[ISCSI_DIGEST_SIZE])
{
	struct scatterlist sg;
370

O
Olaf Kirch 已提交
371 372 373 374 375 376
	sg_init_one(&sg, hdr, hdrlen);
	crypto_hash_digest(hash, &sg, hdrlen, digest);
}

static inline int
iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
377
		      struct iscsi_segment *segment)
O
Olaf Kirch 已提交
378
{
379
	if (!segment->digest_len)
O
Olaf Kirch 已提交
380 381
		return 1;

382 383
	if (memcmp(segment->recv_digest, segment->digest,
		   segment->digest_len)) {
O
Olaf Kirch 已提交
384 385 386 387 388 389 390 391
		debug_scsi("digest mismatch\n");
		return 0;
	}

	return 1;
}

/*
392
 * Helper function to set up segment buffer
O
Olaf Kirch 已提交
393 394
 */
static inline void
395 396
__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
397
{
398 399 400
	memset(segment, 0, sizeof(*segment));
	segment->total_size = size;
	segment->done = done;
O
Olaf Kirch 已提交
401 402

	if (hash) {
403
		segment->hash = hash;
O
Olaf Kirch 已提交
404 405 406 407 408
		crypto_hash_init(hash);
	}
}

static inline void
409 410 411
iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
			  size_t size, iscsi_segment_done_fn_t *done,
			  struct hash_desc *hash)
O
Olaf Kirch 已提交
412
{
413 414 415
	__iscsi_segment_init(segment, size, done, hash);
	segment->data = data;
	segment->size = size;
O
Olaf Kirch 已提交
416 417 418
}

static inline int
419 420 421 422
iscsi_segment_seek_sg(struct iscsi_segment *segment,
		      struct scatterlist *sg_list, unsigned int sg_count,
		      unsigned int offset, size_t size,
		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
423
{
424
	struct scatterlist *sg;
O
Olaf Kirch 已提交
425 426
	unsigned int i;

427 428 429 430 431 432 433 434
	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
		  offset, size);
	__iscsi_segment_init(segment, size, done, hash);
	for_each_sg(sg_list, sg, sg_count, i) {
		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
			   sg->offset);
		if (offset < sg->length) {
			iscsi_tcp_segment_init_sg(segment, sg, offset);
O
Olaf Kirch 已提交
435
			return 0;
436
		}
437
		offset -= sg->length;
438 439
	}

O
Olaf Kirch 已提交
440 441 442 443
	return ISCSI_ERR_DATA_OFFSET;
}

/**
444
 * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
O
Olaf Kirch 已提交
445 446 447 448 449 450 451 452 453 454 455
 * @tcp_conn: iscsi connection to prep for
 *
 * This function always passes NULL for the hash argument, because when this
 * function is called we do not yet know the final size of the header and want
 * to delay the digest processing until we know that.
 */
static void
iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
456
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
457 458 459 460 461 462 463 464 465
				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
				iscsi_tcp_hdr_recv_done, NULL);
}

/*
 * Handle incoming reply to any other type of command
 */
static int
iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
466
			 struct iscsi_segment *segment)
O
Olaf Kirch 已提交
467 468 469 470
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	int rc = 0;

471
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
472 473 474 475 476 477 478 479
		return ISCSI_ERR_DATA_DGST;

	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
			conn->data, tcp_conn->in.datalen);
	if (rc)
		return rc;

	iscsi_tcp_hdr_recv_prep(tcp_conn);
480 481 482
	return 0;
}

O
Olaf Kirch 已提交
483 484 485 486 487 488
static void
iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct hash_desc *rx_hash = NULL;

489 490
	if (conn->datadgst_en &
	    !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD))
O
Olaf Kirch 已提交
491 492
		rx_hash = &tcp_conn->rx_hash;

493
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
494 495 496 497
				conn->data, tcp_conn->in.datalen,
				iscsi_tcp_data_recv_done, rx_hash);
}

M
Mike Christie 已提交
498 499 500 501
/*
 * must be called with session lock
 */
static void
502
iscsi_tcp_cleanup_task(struct iscsi_conn *conn, struct iscsi_task *task)
503
{
504
	struct iscsi_tcp_task *tcp_task = task->dd_data;
505
	struct iscsi_r2t_info *r2t;
506

507 508
	/* nothing to do for mgmt tasks */
	if (!task->sc)
509 510
		return;

511 512 513
	/* flush task's r2t queues */
	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
514
			    sizeof(void*));
515
		debug_scsi("iscsi_tcp_cleanup_task pending r2t dropped\n");
516 517
	}

518
	r2t = tcp_task->r2t;
519
	if (r2t != NULL) {
520
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
521
			    sizeof(void*));
522
		tcp_task->r2t = NULL;
523
	}
524 525 526
}

/**
527
 * iscsi_data_in - SCSI Data-In Response processing
528
 * @conn: iscsi connection
529
 * @task: scsi command task
530 531
 **/
static int
532
iscsi_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
533
{
534
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
535
	struct iscsi_tcp_task *tcp_task = task->dd_data;
536
	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
537
	int datasn = be32_to_cpu(rhdr->datasn);
538
	unsigned total_in_length = scsi_in(task->sc)->length;
539

540
	iscsi_update_cmdsn(conn->session, (struct iscsi_nopin*)rhdr);
541
	if (tcp_conn->in.datalen == 0)
542 543
		return 0;

544 545
	if (tcp_task->exp_datasn != datasn) {
		debug_tcp("%s: task->exp_datasn(%d) != rhdr->datasn(%d)\n",
546
		          __func__, tcp_task->exp_datasn, datasn);
547
		return ISCSI_ERR_DATASN;
548
	}
549

550
	tcp_task->exp_datasn++;
551

552 553
	tcp_task->data_offset = be32_to_cpu(rhdr->offset);
	if (tcp_task->data_offset + tcp_conn->in.datalen > total_in_length) {
554
		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
555
		          __func__, tcp_task->data_offset,
556
		          tcp_conn->in.datalen, total_in_length);
557
		return ISCSI_ERR_DATA_OFFSET;
558
	}
559 560 561 562 563 564 565 566

	conn->datain_pdus_cnt++;
	return 0;
}

/**
 * iscsi_solicit_data_init - initialize first Data-Out
 * @conn: iscsi connection
567
 * @task: scsi command task
568 569 570 571 572 573 574 575 576
 * @r2t: R2T info
 *
 * Notes:
 *	Initialize first Data-Out within this R2T sequence and finds
 *	proper data_offset within this SCSI command.
 *
 *	This function is called with connection lock taken.
 **/
static void
577
iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_task *task,
578 579 580 581
			struct iscsi_r2t_info *r2t)
{
	struct iscsi_data *hdr;

582
	hdr = &r2t->dtask.hdr;
583 584 585 586 587
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
588 589
	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
	hdr->itt = task->hdr->itt;
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
	hdr->exp_statsn = r2t->exp_statsn;
	hdr->offset = cpu_to_be32(r2t->data_offset);
	if (r2t->data_length > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
		hdr->flags = 0;
	} else {
		hton24(hdr->dlength, r2t->data_length);
		r2t->data_count = r2t->data_length;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}
	conn->dataout_pdus_cnt++;

	r2t->sent = 0;
}

/**
 * iscsi_r2t_rsp - iSCSI R2T Response processing
 * @conn: iscsi connection
609
 * @task: scsi command task
610 611
 **/
static int
612
iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
613 614 615
{
	struct iscsi_r2t_info *r2t;
	struct iscsi_session *session = conn->session;
616
	struct iscsi_tcp_task *tcp_task = task->dd_data;
617 618
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
619 620 621
	int r2tsn = be32_to_cpu(rhdr->r2tsn);
	int rc;

622
	if (tcp_conn->in.datalen) {
623 624 625
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2t with datalen %d\n",
				  tcp_conn->in.datalen);
626
		return ISCSI_ERR_DATALEN;
627
	}
628

629 630
	if (tcp_task->exp_datasn != r2tsn){
		debug_tcp("%s: task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
631
		          __func__, tcp_task->exp_datasn, r2tsn);
632
		return ISCSI_ERR_R2TSN;
633
	}
634

635
	/* fill-in new R2T associated with the task */
636 637
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);

638
	if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
639 640
		iscsi_conn_printk(KERN_INFO, conn,
				  "dropping R2T itt %d in recovery.\n",
641
				  task->itt);
642 643
		return 0;
	}
644

645
	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
646 647 648 649
	BUG_ON(!rc);

	r2t->exp_statsn = rhdr->statsn;
	r2t->data_length = be32_to_cpu(rhdr->data_length);
650
	if (r2t->data_length == 0) {
651 652
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with zero data len\n");
653
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
654
			    sizeof(void*));
655 656 657
		return ISCSI_ERR_DATALEN;
	}

658 659 660 661 662
	if (r2t->data_length > session->max_burst)
		debug_scsi("invalid R2T with data len %u and max burst %u."
			   "Attempting to execute request.\n",
			    r2t->data_length, session->max_burst);

663
	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
664
	if (r2t->data_offset + r2t->data_length > scsi_out(task->sc)->length) {
665 666 667
		iscsi_conn_printk(KERN_ERR, conn,
				  "invalid R2T with data len %u at offset %u "
				  "and total length %d\n", r2t->data_length,
668 669
				  r2t->data_offset, scsi_out(task->sc)->length);
		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
670
			    sizeof(void*));
671 672 673 674 675 676
		return ISCSI_ERR_DATALEN;
	}

	r2t->ttt = rhdr->ttt; /* no flip */
	r2t->solicit_datasn = 0;

677
	iscsi_solicit_data_init(conn, task, r2t);
678

679 680
	tcp_task->exp_datasn = r2tsn + 1;
	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
681
	conn->r2t_pdus_cnt++;
682

683
	iscsi_requeue_task(task);
684 685 686
	return 0;
}

O
Olaf Kirch 已提交
687 688 689 690 691
/*
 * Handle incoming reply to DataIn command
 */
static int
iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
692
			  struct iscsi_segment *segment)
O
Olaf Kirch 已提交
693 694 695 696 697
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
	int rc;

698
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
		return ISCSI_ERR_DATA_DGST;

	/* check for non-exceptional status */
	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
		if (rc)
			return rc;
	}

	iscsi_tcp_hdr_recv_prep(tcp_conn);
	return 0;
}

/**
 * iscsi_tcp_hdr_dissect - process PDU header
 * @conn: iSCSI connection
 * @hdr: PDU header
 *
 * This function analyzes the header of the PDU received,
 * and performs several sanity checks. If the PDU is accompanied
 * by data, the receive buffer is set up to copy the incoming data
 * to the correct location.
 */
722
static int
O
Olaf Kirch 已提交
723
iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
724
{
725 726
	int rc = 0, opcode, ahslen;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
727
	struct iscsi_task *task;
728 729

	/* verify PDU length */
730 731
	tcp_conn->in.datalen = ntoh24(hdr->dlength);
	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
732 733 734
		iscsi_conn_printk(KERN_ERR, conn,
				  "iscsi_tcp: datalen %d > %d\n",
				  tcp_conn->in.datalen, conn->max_recv_dlength);
735 736 737
		return ISCSI_ERR_DATALEN;
	}

O
Olaf Kirch 已提交
738 739 740
	/* Additional header segments. So far, we don't
	 * process additional headers.
	 */
741
	ahslen = hdr->hlength << 2;
742

743
	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
744
	/* verify itt (itt encoding: age+cid+itt) */
745
	rc = iscsi_verify_itt(conn, hdr->itt);
746
	if (rc)
747
		return rc;
748

O
Olaf Kirch 已提交
749 750
	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
		  opcode, ahslen, tcp_conn->in.datalen);
751

752 753
	switch(opcode) {
	case ISCSI_OP_SCSI_DATA_IN:
754
		spin_lock(&conn->session->lock);
755 756
		task = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!task)
757 758
			rc = ISCSI_ERR_BAD_ITT;
		else
759
			rc = iscsi_data_in(conn, task);
760 761 762 763
		if (rc) {
			spin_unlock(&conn->session->lock);
			break;
		}
764

O
Olaf Kirch 已提交
765
		if (tcp_conn->in.datalen) {
766
			struct iscsi_tcp_task *tcp_task = task->dd_data;
O
Olaf Kirch 已提交
767
			struct hash_desc *rx_hash = NULL;
768
			struct scsi_data_buffer *sdb = scsi_in(task->sc);
O
Olaf Kirch 已提交
769 770 771 772

			/*
			 * Setup copy of Data-In into the Scsi_Cmnd
			 * Scatterlist case:
773
			 * We set up the iscsi_segment to point to the next
O
Olaf Kirch 已提交
774 775 776 777
			 * scatterlist entry to copy to. As we go along,
			 * we move on to the next scatterlist entry and
			 * update the digest per-entry.
			 */
778 779
			if (conn->datadgst_en &&
			    !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD))
O
Olaf Kirch 已提交
780 781 782 783
				rx_hash = &tcp_conn->rx_hash;

			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
				  "datalen=%d)\n", tcp_conn,
784
				  tcp_task->data_offset,
O
Olaf Kirch 已提交
785
				  tcp_conn->in.datalen);
786 787 788 789 790 791 792 793 794
			rc = iscsi_segment_seek_sg(&tcp_conn->in.segment,
						   sdb->table.sgl,
						   sdb->table.nents,
						   tcp_task->data_offset,
						   tcp_conn->in.datalen,
						   iscsi_tcp_process_data_in,
						   rx_hash);
			spin_unlock(&conn->session->lock);
			return rc;
O
Olaf Kirch 已提交
795
		}
796 797 798
		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
		spin_unlock(&conn->session->lock);
		break;
799
	case ISCSI_OP_SCSI_CMD_RSP:
O
Olaf Kirch 已提交
800 801 802 803 804
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
805 806
		break;
	case ISCSI_OP_R2T:
807
		spin_lock(&conn->session->lock);
808 809
		task = iscsi_itt_to_ctask(conn, hdr->itt);
		if (!task)
810 811
			rc = ISCSI_ERR_BAD_ITT;
		else if (ahslen)
812
			rc = ISCSI_ERR_AHSLEN;
813
		else if (task->sc->sc_data_direction == DMA_TO_DEVICE)
814
			rc = iscsi_r2t_rsp(conn, task);
815
		else
816
			rc = ISCSI_ERR_PROTO;
817
		spin_unlock(&conn->session->lock);
818 819 820 821 822
		break;
	case ISCSI_OP_LOGIN_RSP:
	case ISCSI_OP_TEXT_RSP:
	case ISCSI_OP_REJECT:
	case ISCSI_OP_ASYNC_EVENT:
823 824 825 826 827
		/*
		 * It is possible that we could get a PDU with a buffer larger
		 * than 8K, but there are no targets that currently do this.
		 * For now we fail until we find a vendor that needs it
		 */
O
Olaf Kirch 已提交
828
		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
829 830 831 832 833 834
			iscsi_conn_printk(KERN_ERR, conn,
					  "iscsi_tcp: received buffer of "
					  "len %u but conn buffer is only %u "
					  "(opcode %0x)\n",
					  tcp_conn->in.datalen,
					  ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
835 836 837 838
			rc = ISCSI_ERR_PROTO;
			break;
		}

O
Olaf Kirch 已提交
839 840 841 842 843 844 845
		/* If there's data coming in with the response,
		 * receive it to the connection's buffer.
		 */
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
846
	/* fall through */
847 848
	case ISCSI_OP_LOGOUT_RSP:
	case ISCSI_OP_NOOP_IN:
849 850 851 852 853 854 855
	case ISCSI_OP_SCSI_TMFUNC_RSP:
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
		break;
	default:
		rc = ISCSI_ERR_BAD_OPCODE;
		break;
	}
856

O
Olaf Kirch 已提交
857 858 859 860 861 862
	if (rc == 0) {
		/* Anything that comes with data should have
		 * been handled above. */
		if (tcp_conn->in.datalen)
			return ISCSI_ERR_PROTO;
		iscsi_tcp_hdr_recv_prep(tcp_conn);
863 864
	}

O
Olaf Kirch 已提交
865
	return rc;
866 867
}

O
Olaf Kirch 已提交
868 869 870 871 872 873 874 875 876
/**
 * iscsi_tcp_hdr_recv_done - process PDU header
 *
 * This is the callback invoked when the PDU header has
 * been received. If the header is followed by additional
 * header segments, we go back for more data.
 */
static int
iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
877
			struct iscsi_segment *segment)
878
{
O
Olaf Kirch 已提交
879 880
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr;
881

O
Olaf Kirch 已提交
882 883 884 885 886
	/* Check if there are additional header segments
	 * *prior* to computing the digest, because we
	 * may need to go back to the caller for more.
	 */
	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
887
	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
O
Olaf Kirch 已提交
888 889 890 891 892 893 894 895 896
		/* Bump the header length - the caller will
		 * just loop around and get the AHS for us, and
		 * call again. */
		unsigned int ahslen = hdr->hlength << 2;

		/* Make sure we don't overflow */
		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
			return ISCSI_ERR_AHSLEN;

897 898
		segment->total_size += ahslen;
		segment->size += ahslen;
O
Olaf Kirch 已提交
899
		return 0;
900 901
	}

O
Olaf Kirch 已提交
902 903 904 905
	/* We're done processing the header. See if we're doing
	 * header digests; if so, set up the recv_digest buffer
	 * and go back for more. */
	if (conn->hdrdgst_en) {
906
		if (segment->digest_len == 0) {
907 908 909 910 911
			/*
			 * Even if we offload the digest processing we
			 * splice it in so we can increment the skb/segment
			 * counters in preparation for the data segment.
			 */
912 913
			iscsi_tcp_segment_splice_digest(segment,
							segment->recv_digest);
O
Olaf Kirch 已提交
914
			return 0;
915 916
		}

917 918 919 920 921 922 923 924
		if (!(conn->session->tt->caps & CAP_DIGEST_OFFLOAD)) {
			iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
				segment->total_copied - ISCSI_DIGEST_SIZE,
				segment->digest);

			if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
				return ISCSI_ERR_HDR_DGST;
		}
925
	}
O
Olaf Kirch 已提交
926 927 928

	tcp_conn->in.hdr = hdr;
	return iscsi_tcp_hdr_dissect(conn, hdr);
929 930
}

931 932 933 934 935 936 937 938 939 940 941 942
inline int iscsi_tcp_recv_segment_is_hdr(struct iscsi_tcp_conn *tcp_conn)
{
	return tcp_conn->in.segment.done == iscsi_tcp_hdr_recv_done;
}

enum {
	ISCSI_TCP_SEGMENT_DONE,		/* curr seg has been processed */
	ISCSI_TCP_SKB_DONE,		/* skb is out of data */
	ISCSI_TCP_CONN_ERR,		/* iscsi layer has fired a conn err */
	ISCSI_TCP_SUSPENDED,		/* conn is suspended */
};

943
/**
944 945 946
 * iscsi_tcp_recv_skb - Process skb
 * @conn: iscsi connection
 * @skb: network buffer with header and/or data segment
947
 * @offset: offset in skb
948 949 950 951
 * @offload: bool indicating if transfer was offloaded
 */
int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
		       unsigned int offset, bool offloaded, int *status)
952
{
953
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
954
	struct iscsi_segment *segment = &tcp_conn->in.segment;
O
Olaf Kirch 已提交
955 956 957
	struct skb_seq_state seq;
	unsigned int consumed = 0;
	int rc = 0;
958

O
Olaf Kirch 已提交
959
	debug_tcp("in %d bytes\n", skb->len - offset);
960 961 962

	if (unlikely(conn->suspend_rx)) {
		debug_tcp("conn %d Rx suspended!\n", conn->id);
963
		*status = ISCSI_TCP_SUSPENDED;
964 965 966
		return 0;
	}

967 968 969 970 971
	if (offloaded) {
		segment->total_copied = segment->total_size;
		goto segment_done;
	}

O
Olaf Kirch 已提交
972 973 974 975
	skb_prepare_seq_read(skb, offset, skb->len, &seq);
	while (1) {
		unsigned int avail;
		const u8 *ptr;
976

O
Olaf Kirch 已提交
977
		avail = skb_seq_read(consumed, &ptr, &seq);
978 979 980
		if (avail == 0) {
			debug_tcp("no more data avail. Consumed %d\n",
				  consumed);
981 982 983
			*status = ISCSI_TCP_SKB_DONE;
			skb_abort_seq_read(&seq);
			goto skb_done;
984 985
		}
		BUG_ON(segment->copied >= segment->size);
O
Olaf Kirch 已提交
986 987

		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
988
		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
O
Olaf Kirch 已提交
989 990 991
		BUG_ON(rc == 0);
		consumed += rc;

992
		if (segment->total_copied >= segment->total_size) {
993 994
			skb_abort_seq_read(&seq);
			goto segment_done;
995 996
		}
	}
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010

segment_done:
	*status = ISCSI_TCP_SEGMENT_DONE;
	debug_tcp("segment done\n");
	rc = segment->done(tcp_conn, segment);
	if (rc != 0) {
		*status = ISCSI_TCP_CONN_ERR;
		debug_tcp("Error receiving PDU, errno=%d\n", rc);
		iscsi_conn_failure(conn, rc);
		return 0;
	}
	/* The done() functions sets up the next segment. */

skb_done:
O
Olaf Kirch 已提交
1011 1012
	conn->rxdata_octets += consumed;
	return consumed;
1013 1014
}
EXPORT_SYMBOL_GPL(iscsi_tcp_recv_skb);
1015

1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
/**
 * iscsi_tcp_recv - TCP receive in sendfile fashion
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
 **/
static int
iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
	       unsigned int offset, size_t len)
{
	struct iscsi_conn *conn = rd_desc->arg.data;
	unsigned int consumed, total_consumed = 0;
	int status;

	debug_tcp("in %d bytes\n", skb->len - offset);

	do {
		status = 0;
		consumed = iscsi_tcp_recv_skb(conn, skb, offset, 0, &status);
		offset += consumed;
		total_consumed += consumed;
	} while (consumed != 0 && status != ISCSI_TCP_SKB_DONE);

	debug_tcp("read %d bytes status %d\n", skb->len - offset, status);
	return total_consumed;
1042 1043 1044 1045 1046 1047
}

static void
iscsi_tcp_data_ready(struct sock *sk, int flag)
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
1048
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1049 1050 1051 1052
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

1053
	/*
O
Olaf Kirch 已提交
1054
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
1055
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
1056
	 * hand us all the skbs that are available. iscsi_tcp_recv
1057 1058
	 * handled pdus that cross buffers or pdus that still need data.
	 */
1059
	rd_desc.arg.data = conn;
1060
	rd_desc.count = 1;
O
Olaf Kirch 已提交
1061
	tcp_read_sock(sk, &rd_desc, iscsi_tcp_recv);
1062 1063

	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
1064 1065 1066

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
1067
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
1068 1069 1070 1071 1072
}

static void
iscsi_tcp_state_change(struct sock *sk)
{
1073
	struct iscsi_tcp_conn *tcp_conn;
1074 1075 1076 1077 1078 1079 1080 1081 1082
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

M
Mike Christie 已提交
1083 1084 1085
	if ((sk->sk_state == TCP_CLOSE_WAIT ||
	     sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc)) {
1086 1087 1088 1089
		debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

1090 1091
	tcp_conn = conn->dd_data;
	old_state_change = tcp_conn->old_state_change;
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
static void
iscsi_write_space(struct sock *sk)
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
1106 1107 1108
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	tcp_conn->old_write_space(sk);
1109
	debug_tcp("iscsi_write_space: cid %d\n", conn->id);
1110
	scsi_queue_work(conn->session->host, &conn->xmitwork);
1111 1112 1113 1114 1115
}

static void
iscsi_conn_set_callbacks(struct iscsi_conn *conn)
{
1116 1117
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk = tcp_conn->sock->sk;
1118 1119 1120 1121

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
1122 1123 1124
	tcp_conn->old_data_ready = sk->sk_data_ready;
	tcp_conn->old_state_change = sk->sk_state_change;
	tcp_conn->old_write_space = sk->sk_write_space;
1125 1126 1127 1128 1129 1130 1131
	sk->sk_data_ready = iscsi_tcp_data_ready;
	sk->sk_state_change = iscsi_tcp_state_change;
	sk->sk_write_space = iscsi_write_space;
	write_unlock_bh(&sk->sk_callback_lock);
}

static void
1132
iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
1133
{
1134
	struct sock *sk = tcp_conn->sock->sk;
1135 1136 1137 1138

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
1139 1140 1141
	sk->sk_data_ready   = tcp_conn->old_data_ready;
	sk->sk_state_change = tcp_conn->old_state_change;
	sk->sk_write_space  = tcp_conn->old_write_space;
1142 1143 1144 1145 1146
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
1147 1148 1149 1150
 * iscsi_xmit - TCP transmit
 **/
static int
iscsi_xmit(struct iscsi_conn *conn)
1151
{
1152
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1153 1154 1155
	struct iscsi_segment *segment = &tcp_conn->out.segment;
	unsigned int consumed = 0;
	int rc = 0;
1156

1157 1158
	while (1) {
		rc = iscsi_tcp_xmit_segment(tcp_conn, segment);
1159 1160
		if (rc < 0) {
			rc = ISCSI_ERR_XMIT_FAILED;
1161
			goto error;
1162
		}
1163 1164 1165 1166 1167 1168 1169 1170
		if (rc == 0)
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
1171
				if (rc != 0)
1172 1173 1174
					goto error;
			}
		}
1175 1176
	}

1177 1178 1179 1180 1181 1182 1183 1184 1185
	debug_tcp("xmit %d bytes\n", consumed);

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
	debug_tcp("Error sending PDU, errno=%d\n", rc);
1186 1187
	iscsi_conn_failure(conn, rc);
	return -EIO;
1188 1189 1190
}

/**
1191 1192
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
1193
static inline int
1194
iscsi_tcp_xmit_qlen(struct iscsi_conn *conn)
1195
{
1196 1197
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_segment *segment = &tcp_conn->out.segment;
1198

1199
	return segment->total_copied - segment->total_size;
1200 1201 1202
}

static inline int
1203
iscsi_tcp_flush(struct iscsi_conn *conn)
1204
{
1205 1206 1207 1208 1209
	int rc;

	while (iscsi_tcp_xmit_qlen(conn)) {
		rc = iscsi_xmit(conn);
		if (rc == 0)
1210
			return -EAGAIN;
1211 1212
		if (rc < 0)
			return rc;
1213
	}
1214

1215
	return 0;
1216 1217
}

1218 1219 1220 1221 1222 1223 1224
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
static int
iscsi_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
			struct iscsi_segment *segment)
1225
{
1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
	tcp_conn->out.segment = tcp_conn->out.data_segment;
	debug_tcp("Header done. Next segment size %u total_size %u\n",
		  tcp_conn->out.segment.size, tcp_conn->out.segment.total_size);
	return 0;
}

static void
iscsi_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, size_t hdrlen)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

1237
	debug_tcp("%s(%p%s)\n", __func__, tcp_conn,
1238 1239 1240 1241 1242 1243 1244 1245
			conn->hdrdgst_en? ", digest enabled" : "");

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
	memset(&tcp_conn->out.data_segment, 0, sizeof(struct iscsi_segment));

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
1246
	 * sure that both iscsi_tcp_task and mtask have
1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
		iscsi_tcp_dgst_header(&tcp_conn->tx_hash, hdr, hdrlen,
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
	tcp_conn->out.hdr = hdr;

	iscsi_segment_init_linear(&tcp_conn->out.segment, hdr, hdrlen,
				iscsi_tcp_send_hdr_done, NULL);
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
iscsi_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			 unsigned int count, unsigned int offset,
			 unsigned int len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

1278
	debug_tcp("%s(%p, offset=%d, datalen=%d%s)\n", __func__,
1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302
			tcp_conn, offset, len,
			conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	return iscsi_segment_seek_sg(&tcp_conn->out.data_segment,
				   sg, count, offset, len,
				   NULL, tx_hash);
}

static void
iscsi_tcp_send_linear_data_prepare(struct iscsi_conn *conn, void *data,
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

1303
	debug_tcp("%s(%p, datalen=%d%s)\n", __func__, tcp_conn, len,
1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315
		  conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	iscsi_segment_init_linear(&tcp_conn->out.data_segment,
				data, len, NULL, tx_hash);
1316 1317 1318 1319 1320
}

/**
 * iscsi_solicit_data_cont - initialize next Data-Out
 * @conn: iscsi connection
1321
 * @task: scsi command task
1322 1323 1324 1325 1326 1327 1328 1329 1330
 * @r2t: R2T info
 * @left: bytes left to transfer
 *
 * Notes:
 *	Initialize next Data-Out within this R2T sequence and continue
 *	to process next Scatter-Gather element(if any) of this SCSI command.
 *
 *	Called under connection lock.
 **/
1331
static int
1332
iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_task *task,
1333
			struct iscsi_r2t_info *r2t)
1334 1335
{
	struct iscsi_data *hdr;
1336 1337 1338 1339 1340 1341
	int new_offset, left;

	BUG_ON(r2t->data_length - r2t->sent < 0);
	left = r2t->data_length - r2t->sent;
	if (left == 0)
		return 0;
1342

1343
	hdr = &r2t->dtask.hdr;
1344 1345 1346 1347 1348
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
1349 1350
	memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
	hdr->itt = task->hdr->itt;
1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362
	hdr->exp_statsn = r2t->exp_statsn;
	new_offset = r2t->data_offset + r2t->sent;
	hdr->offset = cpu_to_be32(new_offset);
	if (left > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
	} else {
		hton24(hdr->dlength, left);
		r2t->data_count = left;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}

1363 1364
	conn->dataout_pdus_cnt++;
	return 1;
1365 1366 1367
}

/**
1368
 * iscsi_tcp_task - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
1369
 * @conn: iscsi connection
1370
 * @task: scsi command task
1371 1372
 * @sc: scsi command
 **/
1373
static int
1374
iscsi_tcp_task_init(struct iscsi_task *task)
1375
{
1376 1377 1378
	struct iscsi_tcp_task *tcp_task = task->dd_data;
	struct iscsi_conn *conn = task->conn;
	struct scsi_cmnd *sc = task->sc;
1379
	int err;
1380

1381 1382
	if (!sc) {
		/*
1383
		 * mgmt tasks do not have a scatterlist since they come
1384 1385
		 * in from the iscsi interface.
		 */
1386 1387
		debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id,
			   task->itt);
1388 1389

		/* Prepare PDU, optionally w/ immediate data */
1390
		iscsi_tcp_send_hdr_prep(conn, task->hdr, sizeof(*task->hdr));
1391 1392

		/* If we have immediate data, attach a payload */
1393 1394 1395
		if (task->data_count)
			iscsi_tcp_send_linear_data_prepare(conn, task->data,
							   task->data_count);
1396 1397 1398
		return 0;
	}

1399 1400 1401
	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
	tcp_task->sent = 0;
	tcp_task->exp_datasn = 0;
1402 1403

	/* Prepare PDU, optionally w/ immediate data */
1404 1405 1406 1407
	debug_scsi("task deq [cid %d itt 0x%x imm %d unsol %d]\n",
		    conn->id, task->itt, task->imm_count,
		    task->unsol_count);
	iscsi_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
1408

1409
	if (!task->imm_count)
1410 1411 1412
		return 0;

	/* If we have immediate data, attach a payload */
1413 1414
	err = iscsi_tcp_send_data_prep(conn, scsi_out(sc)->table.sgl,
				       scsi_out(sc)->table.nents,
1415
				       0, task->imm_count);
1416 1417
	if (err)
		return err;
1418 1419
	tcp_task->sent += task->imm_count;
	task->imm_count = 0;
1420
	return 0;
1421 1422
}

1423
/*
1424 1425
 * iscsi_tcp_task_xmit - xmit normal PDU task
 * @task: iscsi command task
1426 1427 1428 1429 1430
 *
 * We're expected to return 0 when everything was transmitted succesfully,
 * -EAGAIN if there's still data in the queue, or != 0 for any other kind
 * of error.
 */
1431
static int
1432
iscsi_tcp_task_xmit(struct iscsi_task *task)
1433
{
1434 1435 1436
	struct iscsi_conn *conn = task->conn;
	struct iscsi_tcp_task *tcp_task = task->dd_data;
	struct scsi_cmnd *sc = task->sc;
1437
	struct scsi_data_buffer *sdb;
1438
	int rc = 0;
1439

1440 1441 1442 1443
flush:
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
1444 1445
		return rc;

1446 1447
	/* mgmt command */
	if (!sc) {
1448 1449
		if (task->hdr->itt == RESERVED_ITT)
			iscsi_put_task(task);
1450 1451 1452
		return 0;
	}

1453 1454 1455
	/* Are we done already? */
	if (sc->sc_data_direction != DMA_TO_DEVICE)
		return 0;
1456

1457
	sdb = scsi_out(sc);
1458 1459
	if (task->unsol_count != 0) {
		struct iscsi_data *hdr = &tcp_task->unsol_dtask.hdr;
1460

1461 1462
		/* Prepare a header for the unsolicited PDU.
		 * The amount of data we want to send will be
1463
		 * in task->data_count.
1464 1465
		 * FIXME: return the data count instead.
		 */
1466
		iscsi_prep_unsolicit_data_pdu(task, hdr);
1467

1468
		debug_tcp("unsol dout [itt 0x%x doff %d dlen %d]\n",
1469
				task->itt, tcp_task->sent, task->data_count);
1470

1471
		iscsi_tcp_send_hdr_prep(conn, hdr, sizeof(*hdr));
1472
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
1473 1474
					      sdb->table.nents, tcp_task->sent,
					      task->data_count);
1475
		if (rc)
1476
			goto fail;
1477 1478
		tcp_task->sent += task->data_count;
		task->unsol_count -= task->data_count;
1479 1480 1481 1482
		goto flush;
	} else {
		struct iscsi_session *session = conn->session;
		struct iscsi_r2t_info *r2t;
1483

1484
		/* All unsolicited PDUs sent. Check for solicited PDUs.
1485
		 */
1486
		spin_lock_bh(&session->lock);
1487
		r2t = tcp_task->r2t;
1488 1489
		if (r2t != NULL) {
			/* Continue with this R2T? */
1490
			if (!iscsi_solicit_data_cont(conn, task, r2t)) {
1491 1492
				debug_scsi("  done with r2t %p\n", r2t);

1493
				__kfifo_put(tcp_task->r2tpool.queue,
1494
					    (void*)&r2t, sizeof(void*));
1495
				tcp_task->r2t = r2t = NULL;
1496
			}
1497 1498
		}

1499
		if (r2t == NULL) {
1500
			__kfifo_get(tcp_task->r2tqueue, (void*)&tcp_task->r2t,
1501
				    sizeof(void*));
1502
			r2t = tcp_task->r2t;
1503
		}
1504
		spin_unlock_bh(&session->lock);
1505

1506 1507 1508 1509
		/* Waiting for more R2Ts to arrive. */
		if (r2t == NULL) {
			debug_tcp("no R2Ts yet\n");
			return 0;
1510 1511
		}

1512
		debug_scsi("sol dout %p [dsn %d itt 0x%x doff %d dlen %d]\n",
1513
			r2t, r2t->solicit_datasn - 1, task->itt,
1514
			r2t->data_offset + r2t->sent, r2t->data_count);
1515

1516 1517
		iscsi_tcp_send_hdr_prep(conn, &r2t->dtask.hdr,
					sizeof(struct iscsi_hdr));
1518

1519 1520
		rc = iscsi_tcp_send_data_prep(conn, sdb->table.sgl,
					      sdb->table.nents,
1521 1522
					      r2t->data_offset + r2t->sent,
					      r2t->data_count);
1523
		if (rc)
1524
			goto fail;
1525
		tcp_task->sent += r2t->data_count;
1526 1527
		r2t->sent += r2t->data_count;
		goto flush;
1528 1529
	}
	return 0;
1530 1531 1532
fail:
	iscsi_conn_failure(conn, rc);
	return -EIO;
1533 1534
}

1535 1536
static struct iscsi_cls_conn *
iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
1537
{
1538 1539 1540
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
1541

1542
	cls_conn = iscsi_conn_setup(cls_session, sizeof(*tcp_conn), conn_idx);
1543 1544 1545
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
1546
	/*
1547 1548
	 * due to strange issues with iser these are not set
	 * in iscsi_conn_setup
1549
	 */
1550
	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
1551

1552
	tcp_conn = conn->dd_data;
1553
	tcp_conn->iscsi_conn = conn;
1554

1555 1556 1557
	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->tx_hash.flags = 0;
1558
	if (IS_ERR(tcp_conn->tx_hash.tfm))
1559
		goto free_conn;
1560

1561 1562 1563
	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->rx_hash.flags = 0;
1564
	if (IS_ERR(tcp_conn->rx_hash.tfm))
1565 1566
		goto free_tx_tfm;

1567
	return cls_conn;
1568

1569
free_tx_tfm:
1570
	crypto_free_hash(tcp_conn->tx_hash.tfm);
1571
free_conn:
1572 1573 1574 1575 1576
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
1577 1578
	iscsi_conn_teardown(cls_conn);
	return NULL;
1579 1580
}

1581 1582 1583
static void
iscsi_tcp_release_conn(struct iscsi_conn *conn)
{
1584
	struct iscsi_session *session = conn->session;
1585
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1586
	struct socket *sock = tcp_conn->sock;
1587

1588
	if (!sock)
1589 1590
		return;

1591
	sock_hold(sock->sk);
1592
	iscsi_conn_restore_callbacks(tcp_conn);
1593
	sock_put(sock->sk);
1594

1595
	spin_lock_bh(&session->lock);
1596
	tcp_conn->sock = NULL;
1597 1598
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
1599 1600
}

1601
static void
1602
iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
1603
{
1604 1605
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1606

1607
	iscsi_tcp_release_conn(conn);
1608

P
Pete Wyckoff 已提交
1609 1610 1611 1612
	if (tcp_conn->tx_hash.tfm)
		crypto_free_hash(tcp_conn->tx_hash.tfm);
	if (tcp_conn->rx_hash.tfm)
		crypto_free_hash(tcp_conn->rx_hash.tfm);
1613

1614
	iscsi_conn_teardown(cls_conn);
1615
}
1616

1617 1618 1619 1620
static void
iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
	struct iscsi_conn *conn = cls_conn->dd_data;
1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	/* userspace may have goofed up and not bound us */
	if (!tcp_conn->sock)
		return;
	/*
	 * Make sure our recv side is stopped.
	 * Older tools called conn stop before ep_disconnect
	 * so IO could still be coming in.
	 */
	write_lock_bh(&tcp_conn->sock->sk->sk_callback_lock);
	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
	write_unlock_bh(&tcp_conn->sock->sk->sk_callback_lock);
1634 1635 1636 1637 1638

	iscsi_conn_stop(cls_conn, flag);
	iscsi_tcp_release_conn(conn);
}

1639 1640 1641 1642 1643 1644 1645 1646 1647 1648
static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
			      char *buf, int *port,
			      int (*getname)(struct socket *, struct sockaddr *,
					int *addrlen))
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

1649
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
H
Harvey Harrison 已提交
1662
		sprintf(buf, "%pI4", &sin->sin_addr.s_addr);
1663 1664 1665 1666 1667 1668
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
H
Harvey Harrison 已提交
1669
		sprintf(buf, "%pI6", &sin6->sin6_addr);
1670 1671 1672 1673 1674 1675 1676 1677 1678
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

1679 1680
static int
iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
1681
		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
1682 1683
		    int is_leading)
{
1684 1685
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
	struct iscsi_host *ihost = shost_priv(shost);
1686 1687 1688 1689 1690
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk;
	struct socket *sock;
	int err;
1691

1692
	/* lookup for existing socket */
1693
	sock = sockfd_lookup((int)transport_eph, &err);
1694
	if (!sock) {
1695 1696
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
1697
		return -EEXIST;
1698
	}
1699 1700 1701 1702 1703 1704 1705 1706 1707 1708
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
	err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
				 &conn->portal_port, kernel_getpeername);
	if (err)
		goto free_socket;

1709 1710
	err = iscsi_tcp_get_addr(conn, sock, ihost->local_address,
				&ihost->local_port, kernel_getsockname);
1711 1712
	if (err)
		goto free_socket;
1713

1714 1715
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
1716
		goto free_socket;
1717

1718 1719
	/* bind iSCSI connection and socket */
	tcp_conn->sock = sock;
1720

1721 1722 1723 1724 1725
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
1726

1727 1728 1729 1730 1731
	iscsi_conn_set_callbacks(conn);
	tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
1732
	iscsi_tcp_hdr_recv_prep(tcp_conn);
1733
	return 0;
1734 1735 1736 1737

free_socket:
	sockfd_put(sock);
	return err;
1738 1739 1740 1741 1742 1743 1744 1745 1746
}

static int
iscsi_r2tpool_alloc(struct iscsi_session *session)
{
	int i;
	int cmd_i;

	/*
1747
	 * initialize per-task: R2T pool and xmit queue
1748 1749
	 */
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1750 1751
	        struct iscsi_task *task = session->cmds[cmd_i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1752 1753 1754 1755 1756 1757 1758 1759

		/*
		 * pre-allocated x4 as much r2ts to handle race when
		 * target acks DataOut faster than we data_xmit() queues
		 * could replenish r2tqueue.
		 */

		/* R2T pool */
1760
		if (iscsi_pool_init(&tcp_task->r2tpool, session->max_r2t * 4, NULL,
1761
				    sizeof(struct iscsi_r2t_info))) {
1762 1763 1764 1765
			goto r2t_alloc_fail;
		}

		/* R2T xmit queue */
1766
		tcp_task->r2tqueue = kfifo_alloc(
1767
		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
1768 1769
		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
			iscsi_pool_free(&tcp_task->r2tpool);
1770 1771 1772 1773 1774 1775 1776 1777
			goto r2t_alloc_fail;
		}
	}

	return 0;

r2t_alloc_fail:
	for (i = 0; i < cmd_i; i++) {
1778 1779
		struct iscsi_task *task = session->cmds[i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1780

1781 1782
		kfifo_free(tcp_task->r2tqueue);
		iscsi_pool_free(&tcp_task->r2tpool);
1783 1784 1785 1786 1787 1788 1789 1790 1791 1792
	}
	return -ENOMEM;
}

static void
iscsi_r2tpool_free(struct iscsi_session *session)
{
	int i;

	for (i = 0; i < session->cmds_max; i++) {
1793 1794
		struct iscsi_task *task = session->cmds[i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1795

1796 1797
		kfifo_free(tcp_task->r2tqueue);
		iscsi_pool_free(&tcp_task->r2tpool);
1798 1799 1800 1801
	}
}

static int
1802
iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
1803
		     char *buf, int buflen)
1804
{
1805
	struct iscsi_conn *conn = cls_conn->dd_data;
1806
	struct iscsi_session *session = conn->session;
1807
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1808
	int value;
1809 1810 1811

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
1812
		iscsi_set_param(cls_conn, param, buf, buflen);
1813 1814
		break;
	case ISCSI_PARAM_DATADGST_EN:
1815
		iscsi_set_param(cls_conn, param, buf, buflen);
1816 1817
		tcp_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
1818 1819
		break;
	case ISCSI_PARAM_MAX_R2T:
1820
		sscanf(buf, "%d", &value);
1821 1822 1823
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
1824 1825
			break;
		iscsi_r2tpool_free(session);
1826
		iscsi_set_param(cls_conn, param, buf, buflen);
1827 1828 1829 1830
		if (iscsi_r2tpool_alloc(session))
			return -ENOMEM;
		break;
	default:
1831
		return iscsi_set_param(cls_conn, param, buf, buflen);
1832 1833 1834 1835 1836 1837
	}

	return 0;
}

static int
1838 1839
iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
			 enum iscsi_param param, char *buf)
1840
{
1841
	struct iscsi_conn *conn = cls_conn->dd_data;
1842
	int len;
1843 1844

	switch(param) {
1845
	case ISCSI_PARAM_CONN_PORT:
1846 1847 1848
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
1849
		break;
1850
	case ISCSI_PARAM_CONN_ADDRESS:
1851 1852 1853
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
1854 1855
		break;
	default:
1856
		return iscsi_conn_get_param(cls_conn, param, buf);
1857 1858 1859 1860 1861
	}

	return len;
}

1862
static void
1863
iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
1864
{
1865
	struct iscsi_conn *conn = cls_conn->dd_data;
1866
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878

	stats->txdata_octets = conn->txdata_octets;
	stats->rxdata_octets = conn->rxdata_octets;
	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
	stats->dataout_pdus = conn->dataout_pdus_cnt;
	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
	stats->datain_pdus = conn->datain_pdus_cnt;
	stats->r2t_pdus = conn->r2t_pdus_cnt;
	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
1879
	stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
1880
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
1881
	stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
1882 1883 1884 1885
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
}

1886
static struct iscsi_cls_session *
1887
iscsi_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
1888 1889
			 uint16_t qdepth, uint32_t initial_cmdsn,
			 uint32_t *hostno)
1890
{
1891 1892
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
1893
	struct Scsi_Host *shost;
1894
	int cmd_i;
1895

1896 1897
	if (ep) {
		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
1898 1899 1900
		return NULL;
	}

1901
	shost = iscsi_host_alloc(&iscsi_sht, 0, qdepth);
1902
	if (!shost)
1903
		return NULL;
1904 1905 1906 1907
	shost->transportt = iscsi_tcp_scsi_transport;
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
1908
	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
1909

1910
	if (iscsi_host_add(shost, NULL))
1911 1912 1913 1914
		goto free_host;
	*hostno = shost->host_no;

	cls_session = iscsi_session_setup(&iscsi_tcp_transport, shost, cmds_max,
1915
					  sizeof(struct iscsi_tcp_task),
1916
					  initial_cmdsn, 0);
1917 1918 1919
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
1920

1921
	shost->can_queue = session->scsi_cmds_max;
1922
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
1923 1924
		struct iscsi_task *task = session->cmds[cmd_i];
		struct iscsi_tcp_task *tcp_task = task->dd_data;
1925

1926 1927
		task->hdr = &tcp_task->hdr.cmd_hdr;
		task->hdr_max = sizeof(tcp_task->hdr) - ISCSI_DIGEST_SIZE;
1928 1929
	}

1930 1931
	if (iscsi_r2tpool_alloc(session))
		goto remove_session;
1932 1933
	return cls_session;

1934
remove_session:
1935
	iscsi_session_teardown(cls_session);
1936
remove_host:
1937
	iscsi_host_remove(shost);
1938
free_host:
1939
	iscsi_host_free(shost);
1940 1941 1942 1943 1944
	return NULL;
}

static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
{
1945 1946 1947
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);

	iscsi_r2tpool_free(cls_session->dd_data);
1948
	iscsi_session_teardown(cls_session);
1949

1950 1951
	iscsi_host_remove(shost);
	iscsi_host_free(shost);
1952 1953
}

1954 1955
static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
{
1956
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
1957 1958 1959 1960
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

1961
static struct scsi_host_template iscsi_sht = {
1962
	.module			= THIS_MODULE,
1963
	.name			= "iSCSI Initiator over TCP/IP",
1964 1965
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
1966
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
1967
	.sg_tablesize		= 4096,
1968
	.max_sectors		= 0xFFFF,
1969 1970
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
1971
	.eh_device_reset_handler= iscsi_eh_device_reset,
1972
	.eh_target_reset_handler= iscsi_eh_target_reset,
1973
	.use_clustering         = DISABLE_CLUSTERING,
1974
	.slave_configure        = iscsi_tcp_slave_configure,
1975 1976 1977 1978
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

1979 1980 1981 1982 1983
static struct iscsi_transport iscsi_tcp_transport = {
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
1997
				  ISCSI_CONN_ADDRESS |
1998 1999 2000
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
2001 2002
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
2003
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
2004 2005
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
				  ISCSI_LU_RESET_TMO |
2006 2007
				  ISCSI_PING_TMO | ISCSI_RECV_TMO |
				  ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
2008
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
2009 2010
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
2011 2012 2013 2014 2015 2016 2017
	/* session management */
	.create_session		= iscsi_tcp_session_create,
	.destroy_session	= iscsi_tcp_session_destroy,
	/* connection management */
	.create_conn		= iscsi_tcp_conn_create,
	.bind_conn		= iscsi_tcp_conn_bind,
	.destroy_conn		= iscsi_tcp_conn_destroy,
2018
	.set_param		= iscsi_conn_set_param,
2019
	.get_conn_param		= iscsi_tcp_conn_get_param,
2020
	.get_session_param	= iscsi_session_get_param,
2021
	.start_conn		= iscsi_conn_start,
2022
	.stop_conn		= iscsi_tcp_conn_stop,
2023
	/* iscsi host params */
2024
	.get_host_param		= iscsi_host_get_param,
2025
	.set_host_param		= iscsi_host_set_param,
2026
	/* IO */
2027 2028
	.send_pdu		= iscsi_conn_send_pdu,
	.get_stats		= iscsi_conn_get_stats,
2029 2030 2031
	.init_task		= iscsi_tcp_task_init,
	.xmit_task		= iscsi_tcp_task_xmit,
	.cleanup_task		= iscsi_tcp_cleanup_task,
2032
	/* recovery */
M
Mike Christie 已提交
2033
	.session_recovery_timedout = iscsi_session_recovery_timedout,
2034 2035 2036 2037 2038 2039
};

static int __init
iscsi_tcp_init(void)
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
2040 2041
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
2042 2043 2044
		return -EINVAL;
	}

2045 2046 2047
	iscsi_tcp_scsi_transport = iscsi_register_transport(
							&iscsi_tcp_transport);
	if (!iscsi_tcp_scsi_transport)
2048
		return -ENODEV;
2049

2050
	return 0;
2051 2052 2053 2054 2055 2056 2057 2058 2059 2060
}

static void __exit
iscsi_tcp_exit(void)
{
	iscsi_unregister_transport(&iscsi_tcp_transport);
}

module_init(iscsi_tcp_init);
module_exit(iscsi_tcp_exit);