iscsi_tcp.c 53.6 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/list.h>
#include <linux/inet.h>
M
Mike Christie 已提交
32
#include <linux/file.h>
33 34 35 36 37 38 39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41 42 43 44 45 46 47 48 49 50
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");
O
Olaf Kirch 已提交
51
#undef DEBUG_TCP
52 53 54
#define DEBUG_ASSERT

#ifdef DEBUG_TCP
55
#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
56 57 58 59 60 61 62 63 64 65 66 67 68 69
#else
#define debug_tcp(fmt...)
#endif

#ifndef DEBUG_ASSERT
#ifdef BUG_ON
#undef BUG_ON
#endif
#define BUG_ON(expr)
#endif

static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

O
Olaf Kirch 已提交
70
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
71
				   struct iscsi_segment *segment);
72

O
Olaf Kirch 已提交
73
/*
74
 * Scatterlist handling: inside the iscsi_segment, we
O
Olaf Kirch 已提交
75 76 77 78 79 80 81 82 83 84 85 86
 * remember an index into the scatterlist, and set data/size
 * to the current scatterlist entry. For highmem pages, we
 * kmap as needed.
 *
 * Note that the page is unmapped when we return from
 * TCP's data_ready handler, so we may end up mapping and
 * unmapping the same page repeatedly. The whole reason
 * for this is that we shouldn't keep the page mapped
 * outside the softirq.
 */

/**
87 88 89
 * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
 * @segment: the buffer object
 * @sg: scatterlist
O
Olaf Kirch 已提交
90 91
 * @offset: byte offset into that sg entry
 *
92
 * This function sets up the segment so that subsequent
O
Olaf Kirch 已提交
93 94 95 96
 * data is copied to the indicated sg entry, at the given
 * offset.
 */
static inline void
97 98
iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
			  struct scatterlist *sg, unsigned int offset)
O
Olaf Kirch 已提交
99
{
100 101 102 103 104
	segment->sg = sg;
	segment->sg_offset = offset;
	segment->size = min(sg->length - offset,
			    segment->total_size - segment->total_copied);
	segment->data = NULL;
O
Olaf Kirch 已提交
105 106 107
}

/**
108 109 110
 * iscsi_tcp_segment_map - map the current S/G page
 * @segment: iscsi_segment
 * @recv: 1 if called from recv path
O
Olaf Kirch 已提交
111 112 113 114 115 116
 *
 * We only need to possibly kmap data if scatter lists are being used,
 * because the iscsi passthrough and internal IO paths will never use high
 * mem pages.
 */
static inline void
117
iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
O
Olaf Kirch 已提交
118 119 120
{
	struct scatterlist *sg;

121
	if (segment->data != NULL || !segment->sg)
O
Olaf Kirch 已提交
122 123
		return;

124 125
	sg = segment->sg;
	BUG_ON(segment->sg_mapped);
O
Olaf Kirch 已提交
126
	BUG_ON(sg->length == 0);
127 128 129 130 131 132 133 134 135 136 137 138 139 140

	/*
	 * If the page count is greater than one it is ok to send
	 * to the network layer's zero copy send path. If not we
	 * have to go the slow sendmsg path. We always map for the
	 * recv path.
	 */
	if (page_count(sg_page(sg)) >= 1 && !recv)
		return;

	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
		  segment);
	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
O
Olaf Kirch 已提交
141 142 143
}

static inline void
144
iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
O
Olaf Kirch 已提交
145
{
146 147 148 149 150 151 152
	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);

	if (segment->sg_mapped) {
		debug_tcp("iscsi_tcp_segment_unmap valid\n");
		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
		segment->sg_mapped = NULL;
		segment->data = NULL;
O
Olaf Kirch 已提交
153 154 155 156 157 158 159
	}
}

/*
 * Splice the digest buffer into the buffer
 */
static inline void
160
iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
O
Olaf Kirch 已提交
161
{
162 163 164 165 166 167 168
	segment->data = digest;
	segment->digest_len = ISCSI_DIGEST_SIZE;
	segment->total_size += ISCSI_DIGEST_SIZE;
	segment->size = ISCSI_DIGEST_SIZE;
	segment->copied = 0;
	segment->sg = NULL;
	segment->hash = NULL;
O
Olaf Kirch 已提交
169 170 171
}

/**
172 173 174 175
 * iscsi_tcp_segment_done - check whether the segment is complete
 * @segment: iscsi segment to check
 * @recv: set to one of this is called from the recv path
 * @copied: number of bytes copied
O
Olaf Kirch 已提交
176
 *
177
 * Check if we're done receiving this segment. If the receive
O
Olaf Kirch 已提交
178 179 180 181 182 183 184 185
 * buffer is full but we expect more data, move on to the
 * next entry in the scatterlist.
 *
 * If the amount of data we received isn't a multiple of 4,
 * we will transparently receive the pad bytes, too.
 *
 * This function must be re-entrant.
 */
186
static inline int
187
iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv, unsigned copied)
188
{
O
Olaf Kirch 已提交
189
	static unsigned char padbuf[ISCSI_PAD_LEN];
190
	struct scatterlist sg;
191
	unsigned int pad;
O
Olaf Kirch 已提交
192

193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
		  segment->size, recv ? "recv" : "xmit");
	if (segment->hash && copied) {
		/*
		 * If a segment is kmapd we must unmap it before sending
		 * to the crypto layer since that will try to kmap it again.
		 */
		iscsi_tcp_segment_unmap(segment);

		if (!segment->data) {
			sg_init_table(&sg, 1);
			sg_set_page(&sg, sg_page(segment->sg), copied,
				    segment->copied + segment->sg_offset +
							segment->sg->offset);
		} else
			sg_init_one(&sg, segment->data + segment->copied,
				    copied);
		crypto_hash_update(segment->hash, &sg, copied);
	}

	segment->copied += copied;
	if (segment->copied < segment->size) {
		iscsi_tcp_segment_map(segment, recv);
O
Olaf Kirch 已提交
216 217
		return 0;
	}
218

219 220 221
	segment->total_copied += segment->copied;
	segment->copied = 0;
	segment->size = 0;
222

O
Olaf Kirch 已提交
223
	/* Unmap the current scatterlist page, if there is one. */
224
	iscsi_tcp_segment_unmap(segment);
O
Olaf Kirch 已提交
225 226

	/* Do we have more scatterlist entries? */
227 228 229
	debug_tcp("total copied %u total size %u\n", segment->total_copied,
		   segment->total_size);
	if (segment->total_copied < segment->total_size) {
O
Olaf Kirch 已提交
230
		/* Proceed to the next entry in the scatterlist. */
231 232 233 234
		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
					  0);
		iscsi_tcp_segment_map(segment, recv);
		BUG_ON(segment->size == 0);
O
Olaf Kirch 已提交
235 236 237 238
		return 0;
	}

	/* Do we need to handle padding? */
239
	pad = iscsi_padding(segment->total_copied);
240
	if (pad != 0) {
O
Olaf Kirch 已提交
241
		debug_tcp("consume %d pad bytes\n", pad);
242 243 244
		segment->total_size += pad;
		segment->size = pad;
		segment->data = padbuf;
O
Olaf Kirch 已提交
245 246 247 248
		return 0;
	}

	/*
249
	 * Set us up for transferring the data digest. hdr digest
O
Olaf Kirch 已提交
250 251
	 * is completely handled in hdr done function.
	 */
252 253 254 255 256
	if (segment->hash) {
		crypto_hash_final(segment->hash, segment->digest);
		iscsi_tcp_segment_splice_digest(segment,
				 recv ? segment->recv_digest : segment->digest);
		return 0;
O
Olaf Kirch 已提交
257
	}
258

O
Olaf Kirch 已提交
259 260
	return 1;
}
261

O
Olaf Kirch 已提交
262
/**
263
 * iscsi_tcp_xmit_segment - transmit segment
O
Olaf Kirch 已提交
264
 * @tcp_conn: the iSCSI TCP connection
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
static int
iscsi_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment)
{
	struct socket *sk = tcp_conn->sock;
	unsigned int copied = 0;
	int r = 0;

	while (!iscsi_tcp_segment_done(segment, 0, r)) {
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_conn->sendpage(sk, sg_page(sg), offset, copy,
					       flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			if (copied || r == -EAGAIN)
				break;
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_tcp_segment_recv - copy data to segment
 * @tcp_conn: the iSCSI TCP connection
 * @segment: the buffer to copy to
O
Olaf Kirch 已提交
326 327 328 329 330 331 332 333 334 335 336 337 338 339
 * @ptr: data pointer
 * @len: amount of data available
 *
 * This function copies up to @len bytes to the
 * given buffer, and returns the number of bytes
 * consumed, which can actually be less than @len.
 *
 * If hash digest is enabled, the function will update the
 * hash while copying.
 * Combining these two operations doesn't buy us a lot (yet),
 * but in the future we could implement combined copy+crc,
 * just way we do for network layer checksums.
 */
static int
340 341 342
iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment, const void *ptr,
		       unsigned int len)
O
Olaf Kirch 已提交
343
{
344
	unsigned int copy = 0, copied = 0;
345

346 347 348 349 350
	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
		if (copied == len) {
			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
				  len);
			break;
O
Olaf Kirch 已提交
351
		}
352 353 354 355

		copy = min(len - copied, segment->size - segment->copied);
		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
		memcpy(segment->data + segment->copied, ptr + copied, copy);
O
Olaf Kirch 已提交
356 357 358 359 360 361 362 363 364 365
		copied += copy;
	}
	return copied;
}

static inline void
iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
		      unsigned char digest[ISCSI_DIGEST_SIZE])
{
	struct scatterlist sg;
366

O
Olaf Kirch 已提交
367 368 369 370 371 372
	sg_init_one(&sg, hdr, hdrlen);
	crypto_hash_digest(hash, &sg, hdrlen, digest);
}

static inline int
iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
373
		      struct iscsi_segment *segment)
O
Olaf Kirch 已提交
374
{
375
	if (!segment->digest_len)
O
Olaf Kirch 已提交
376 377
		return 1;

378 379
	if (memcmp(segment->recv_digest, segment->digest,
		   segment->digest_len)) {
O
Olaf Kirch 已提交
380 381 382 383 384 385 386 387
		debug_scsi("digest mismatch\n");
		return 0;
	}

	return 1;
}

/*
388
 * Helper function to set up segment buffer
O
Olaf Kirch 已提交
389 390
 */
static inline void
391 392
__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
393
{
394 395 396
	memset(segment, 0, sizeof(*segment));
	segment->total_size = size;
	segment->done = done;
O
Olaf Kirch 已提交
397 398

	if (hash) {
399
		segment->hash = hash;
O
Olaf Kirch 已提交
400 401 402 403 404
		crypto_hash_init(hash);
	}
}

static inline void
405 406 407
iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
			  size_t size, iscsi_segment_done_fn_t *done,
			  struct hash_desc *hash)
O
Olaf Kirch 已提交
408
{
409 410 411
	__iscsi_segment_init(segment, size, done, hash);
	segment->data = data;
	segment->size = size;
O
Olaf Kirch 已提交
412 413 414
}

static inline int
415 416 417 418
iscsi_segment_seek_sg(struct iscsi_segment *segment,
		      struct scatterlist *sg_list, unsigned int sg_count,
		      unsigned int offset, size_t size,
		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
O
Olaf Kirch 已提交
419
{
420
	struct scatterlist *sg;
O
Olaf Kirch 已提交
421 422
	unsigned int i;

423 424 425 426 427 428 429 430
	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
		  offset, size);
	__iscsi_segment_init(segment, size, done, hash);
	for_each_sg(sg_list, sg, sg_count, i) {
		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
			   sg->offset);
		if (offset < sg->length) {
			iscsi_tcp_segment_init_sg(segment, sg, offset);
O
Olaf Kirch 已提交
431
			return 0;
432
		}
433
		offset -= sg->length;
434 435
	}

O
Olaf Kirch 已提交
436 437 438 439
	return ISCSI_ERR_DATA_OFFSET;
}

/**
440
 * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
O
Olaf Kirch 已提交
441 442 443 444 445 446 447 448 449 450 451
 * @tcp_conn: iscsi connection to prep for
 *
 * This function always passes NULL for the hash argument, because when this
 * function is called we do not yet know the final size of the header and want
 * to delay the digest processing until we know that.
 */
static void
iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
452
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
453 454 455 456 457 458 459 460 461
				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
				iscsi_tcp_hdr_recv_done, NULL);
}

/*
 * Handle incoming reply to any other type of command
 */
static int
iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
462
			 struct iscsi_segment *segment)
O
Olaf Kirch 已提交
463 464 465 466
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	int rc = 0;

467
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
468 469 470 471 472 473 474 475
		return ISCSI_ERR_DATA_DGST;

	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
			conn->data, tcp_conn->in.datalen);
	if (rc)
		return rc;

	iscsi_tcp_hdr_recv_prep(tcp_conn);
476 477 478
	return 0;
}

O
Olaf Kirch 已提交
479 480 481 482 483 484 485 486 487
static void
iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct hash_desc *rx_hash = NULL;

	if (conn->datadgst_en)
		rx_hash = &tcp_conn->rx_hash;

488
	iscsi_segment_init_linear(&tcp_conn->in.segment,
O
Olaf Kirch 已提交
489 490 491 492
				conn->data, tcp_conn->in.datalen,
				iscsi_tcp_data_recv_done, rx_hash);
}

M
Mike Christie 已提交
493 494 495 496
/*
 * must be called with session lock
 */
static void
497
iscsi_tcp_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
498
{
499
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
500
	struct iscsi_r2t_info *r2t;
501

502 503 504 505 506 507 508
	/* flush ctask's r2t queues */
	while (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		debug_scsi("iscsi_tcp_cleanup_ctask pending r2t dropped\n");
	}

509 510 511 512 513 514
	r2t = tcp_ctask->r2t;
	if (r2t != NULL) {
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		tcp_ctask->r2t = NULL;
	}
515 516 517 518 519 520 521 522 523 524
}

/**
 * iscsi_data_rsp - SCSI Data-In Response processing
 * @conn: iscsi connection
 * @ctask: scsi command task
 **/
static int
iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
{
525 526 527
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
528
	struct iscsi_session *session = conn->session;
529
	struct scsi_cmnd *sc = ctask->sc;
530 531
	int datasn = be32_to_cpu(rhdr->datasn);

532
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
533
	if (tcp_conn->in.datalen == 0)
534 535
		return 0;

536 537 538
	if (tcp_ctask->exp_datasn != datasn) {
		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->datasn(%d)\n",
		          __FUNCTION__, tcp_ctask->exp_datasn, datasn);
539
		return ISCSI_ERR_DATASN;
540
	}
541

542
	tcp_ctask->exp_datasn++;
543

544
	tcp_ctask->data_offset = be32_to_cpu(rhdr->offset);
545
	if (tcp_ctask->data_offset + tcp_conn->in.datalen > scsi_bufflen(sc)) {
546 547
		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
		          __FUNCTION__, tcp_ctask->data_offset,
548
		          tcp_conn->in.datalen, scsi_bufflen(sc));
549
		return ISCSI_ERR_DATA_OFFSET;
550
	}
551 552

	if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
553
		sc->result = (DID_OK << 16) | rhdr->cmd_status;
554
		conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
555 556
		if (rhdr->flags & (ISCSI_FLAG_DATA_UNDERFLOW |
		                   ISCSI_FLAG_DATA_OVERFLOW)) {
557 558 559
			int res_count = be32_to_cpu(rhdr->residual_count);

			if (res_count > 0 &&
560 561
			    (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW ||
			     res_count <= scsi_bufflen(sc)))
562
				scsi_set_resid(sc, res_count);
563
			else
564 565
				sc->result = (DID_BAD_TARGET << 16) |
					rhdr->cmd_status;
566
		}
567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
	}

	conn->datain_pdus_cnt++;
	return 0;
}

/**
 * iscsi_solicit_data_init - initialize first Data-Out
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @r2t: R2T info
 *
 * Notes:
 *	Initialize first Data-Out within this R2T sequence and finds
 *	proper data_offset within this SCSI command.
 *
 *	This function is called with connection lock taken.
 **/
static void
iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
			struct iscsi_r2t_info *r2t)
{
	struct iscsi_data *hdr;

591
	hdr = &r2t->dtask.hdr;
592 593 594 595 596
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
597 598
	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
	hdr->itt = ctask->hdr->itt;
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
	hdr->exp_statsn = r2t->exp_statsn;
	hdr->offset = cpu_to_be32(r2t->data_offset);
	if (r2t->data_length > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
		hdr->flags = 0;
	} else {
		hton24(hdr->dlength, r2t->data_length);
		r2t->data_count = r2t->data_length;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}
	conn->dataout_pdus_cnt++;

	r2t->sent = 0;
}

/**
 * iscsi_r2t_rsp - iSCSI R2T Response processing
 * @conn: iscsi connection
 * @ctask: scsi command task
 **/
static int
iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
{
	struct iscsi_r2t_info *r2t;
	struct iscsi_session *session = conn->session;
625 626 627
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
628 629 630
	int r2tsn = be32_to_cpu(rhdr->r2tsn);
	int rc;

631 632 633
	if (tcp_conn->in.datalen) {
		printk(KERN_ERR "iscsi_tcp: invalid R2t with datalen %d\n",
		       tcp_conn->in.datalen);
634
		return ISCSI_ERR_DATALEN;
635
	}
636

637 638 639
	if (tcp_ctask->exp_datasn != r2tsn){
		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
		          __FUNCTION__, tcp_ctask->exp_datasn, r2tsn);
640
		return ISCSI_ERR_R2TSN;
641
	}
642 643

	/* fill-in new R2T associated with the task */
644 645
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);

646
	if (!ctask->sc || session->state != ISCSI_STATE_LOGGED_IN) {
647 648 649 650
		printk(KERN_INFO "iscsi_tcp: dropping R2T itt %d in "
		       "recovery...\n", ctask->itt);
		return 0;
	}
651

652
	rc = __kfifo_get(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
653 654 655 656
	BUG_ON(!rc);

	r2t->exp_statsn = rhdr->statsn;
	r2t->data_length = be32_to_cpu(rhdr->data_length);
657 658
	if (r2t->data_length == 0) {
		printk(KERN_ERR "iscsi_tcp: invalid R2T with zero data len\n");
659 660
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
661 662 663
		return ISCSI_ERR_DATALEN;
	}

664 665 666 667 668
	if (r2t->data_length > session->max_burst)
		debug_scsi("invalid R2T with data len %u and max burst %u."
			   "Attempting to execute request.\n",
			    r2t->data_length, session->max_burst);

669
	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
670
	if (r2t->data_offset + r2t->data_length > scsi_bufflen(ctask->sc)) {
671 672
		printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at "
		       "offset %u and total length %d\n", r2t->data_length,
673
		       r2t->data_offset, scsi_bufflen(ctask->sc));
674 675
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
676 677 678 679 680 681 682 683
		return ISCSI_ERR_DATALEN;
	}

	r2t->ttt = rhdr->ttt; /* no flip */
	r2t->solicit_datasn = 0;

	iscsi_solicit_data_init(conn, ctask, r2t);

684
	tcp_ctask->exp_datasn = r2tsn + 1;
685
	__kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
686
	conn->r2t_pdus_cnt++;
687 688

	iscsi_requeue_ctask(ctask);
689 690 691
	return 0;
}

O
Olaf Kirch 已提交
692 693 694 695 696
/*
 * Handle incoming reply to DataIn command
 */
static int
iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
697
			  struct iscsi_segment *segment)
O
Olaf Kirch 已提交
698 699 700 701 702
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
	int rc;

703
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
		return ISCSI_ERR_DATA_DGST;

	/* check for non-exceptional status */
	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
		if (rc)
			return rc;
	}

	iscsi_tcp_hdr_recv_prep(tcp_conn);
	return 0;
}

/**
 * iscsi_tcp_hdr_dissect - process PDU header
 * @conn: iSCSI connection
 * @hdr: PDU header
 *
 * This function analyzes the header of the PDU received,
 * and performs several sanity checks. If the PDU is accompanied
 * by data, the receive buffer is set up to copy the incoming data
 * to the correct location.
 */
727
static int
O
Olaf Kirch 已提交
728
iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
729
{
730
	int rc = 0, opcode, ahslen;
731
	struct iscsi_session *session = conn->session;
732
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
O
Olaf Kirch 已提交
733 734
	struct iscsi_cmd_task *ctask;
	uint32_t itt;
735 736

	/* verify PDU length */
737 738
	tcp_conn->in.datalen = ntoh24(hdr->dlength);
	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
739
		printk(KERN_ERR "iscsi_tcp: datalen %d > %d\n",
740
		       tcp_conn->in.datalen, conn->max_recv_dlength);
741 742 743
		return ISCSI_ERR_DATALEN;
	}

O
Olaf Kirch 已提交
744 745 746
	/* Additional header segments. So far, we don't
	 * process additional headers.
	 */
747
	ahslen = hdr->hlength << 2;
748

749
	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
750
	/* verify itt (itt encoding: age+cid+itt) */
751
	rc = iscsi_verify_itt(conn, hdr, &itt);
752
	if (rc)
753
		return rc;
754

O
Olaf Kirch 已提交
755 756
	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
		  opcode, ahslen, tcp_conn->in.datalen);
757

758 759
	switch(opcode) {
	case ISCSI_OP_SCSI_DATA_IN:
O
Olaf Kirch 已提交
760
		ctask = session->cmds[itt];
761
		spin_lock(&conn->session->lock);
O
Olaf Kirch 已提交
762
		rc = iscsi_data_rsp(conn, ctask);
763
		spin_unlock(&conn->session->lock);
764 765
		if (rc)
			return rc;
O
Olaf Kirch 已提交
766 767 768 769 770 771 772
		if (tcp_conn->in.datalen) {
			struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
			struct hash_desc *rx_hash = NULL;

			/*
			 * Setup copy of Data-In into the Scsi_Cmnd
			 * Scatterlist case:
773
			 * We set up the iscsi_segment to point to the next
O
Olaf Kirch 已提交
774 775 776 777 778 779 780 781 782 783 784
			 * scatterlist entry to copy to. As we go along,
			 * we move on to the next scatterlist entry and
			 * update the digest per-entry.
			 */
			if (conn->datadgst_en)
				rx_hash = &tcp_conn->rx_hash;

			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
				  "datalen=%d)\n", tcp_conn,
				  tcp_ctask->data_offset,
				  tcp_conn->in.datalen);
785 786 787 788 789 790 791
			return iscsi_segment_seek_sg(&tcp_conn->in.segment,
						     scsi_sglist(ctask->sc),
						     scsi_sg_count(ctask->sc),
						     tcp_ctask->data_offset,
						     tcp_conn->in.datalen,
						     iscsi_tcp_process_data_in,
						     rx_hash);
O
Olaf Kirch 已提交
792
		}
793 794
		/* fall through */
	case ISCSI_OP_SCSI_CMD_RSP:
O
Olaf Kirch 已提交
795 796 797 798 799
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
800 801
		break;
	case ISCSI_OP_R2T:
O
Olaf Kirch 已提交
802
		ctask = session->cmds[itt];
803 804
		if (ahslen)
			rc = ISCSI_ERR_AHSLEN;
805 806
		else if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) {
			spin_lock(&session->lock);
O
Olaf Kirch 已提交
807
			rc = iscsi_r2t_rsp(conn, ctask);
808 809
			spin_unlock(&session->lock);
		} else
810 811 812 813 814 815
			rc = ISCSI_ERR_PROTO;
		break;
	case ISCSI_OP_LOGIN_RSP:
	case ISCSI_OP_TEXT_RSP:
	case ISCSI_OP_REJECT:
	case ISCSI_OP_ASYNC_EVENT:
816 817 818 819 820
		/*
		 * It is possible that we could get a PDU with a buffer larger
		 * than 8K, but there are no targets that currently do this.
		 * For now we fail until we find a vendor that needs it
		 */
O
Olaf Kirch 已提交
821
		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
822 823 824
			printk(KERN_ERR "iscsi_tcp: received buffer of len %u "
			      "but conn buffer is only %u (opcode %0x)\n",
			      tcp_conn->in.datalen,
825
			      ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
826 827 828 829
			rc = ISCSI_ERR_PROTO;
			break;
		}

O
Olaf Kirch 已提交
830 831 832 833 834 835 836
		/* If there's data coming in with the response,
		 * receive it to the connection's buffer.
		 */
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
837
	/* fall through */
838 839
	case ISCSI_OP_LOGOUT_RSP:
	case ISCSI_OP_NOOP_IN:
840 841 842 843 844 845 846
	case ISCSI_OP_SCSI_TMFUNC_RSP:
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
		break;
	default:
		rc = ISCSI_ERR_BAD_OPCODE;
		break;
	}
847

O
Olaf Kirch 已提交
848 849 850 851 852 853
	if (rc == 0) {
		/* Anything that comes with data should have
		 * been handled above. */
		if (tcp_conn->in.datalen)
			return ISCSI_ERR_PROTO;
		iscsi_tcp_hdr_recv_prep(tcp_conn);
854 855
	}

O
Olaf Kirch 已提交
856
	return rc;
857 858
}

O
Olaf Kirch 已提交
859 860 861 862 863 864 865 866 867
/**
 * iscsi_tcp_hdr_recv_done - process PDU header
 *
 * This is the callback invoked when the PDU header has
 * been received. If the header is followed by additional
 * header segments, we go back for more data.
 */
static int
iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
868
			struct iscsi_segment *segment)
869
{
O
Olaf Kirch 已提交
870 871
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr;
872

O
Olaf Kirch 已提交
873 874 875 876 877
	/* Check if there are additional header segments
	 * *prior* to computing the digest, because we
	 * may need to go back to the caller for more.
	 */
	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
878
	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
O
Olaf Kirch 已提交
879 880 881 882 883 884 885 886 887
		/* Bump the header length - the caller will
		 * just loop around and get the AHS for us, and
		 * call again. */
		unsigned int ahslen = hdr->hlength << 2;

		/* Make sure we don't overflow */
		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
			return ISCSI_ERR_AHSLEN;

888 889
		segment->total_size += ahslen;
		segment->size += ahslen;
O
Olaf Kirch 已提交
890
		return 0;
891 892
	}

O
Olaf Kirch 已提交
893 894 895 896
	/* We're done processing the header. See if we're doing
	 * header digests; if so, set up the recv_digest buffer
	 * and go back for more. */
	if (conn->hdrdgst_en) {
897 898 899
		if (segment->digest_len == 0) {
			iscsi_tcp_segment_splice_digest(segment,
							segment->recv_digest);
O
Olaf Kirch 已提交
900
			return 0;
901
		}
O
Olaf Kirch 已提交
902
		iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
903 904
				      segment->total_copied - ISCSI_DIGEST_SIZE,
				      segment->digest);
905

906
		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
O
Olaf Kirch 已提交
907
			return ISCSI_ERR_HDR_DGST;
908
	}
O
Olaf Kirch 已提交
909 910 911

	tcp_conn->in.hdr = hdr;
	return iscsi_tcp_hdr_dissect(conn, hdr);
912 913 914
}

/**
O
Olaf Kirch 已提交
915
 * iscsi_tcp_recv - TCP receive in sendfile fashion
916 917 918 919 920 921
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
 **/
static int
O
Olaf Kirch 已提交
922 923
iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
	       unsigned int offset, size_t len)
924 925
{
	struct iscsi_conn *conn = rd_desc->arg.data;
926
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
927
	struct iscsi_segment *segment = &tcp_conn->in.segment;
O
Olaf Kirch 已提交
928 929 930
	struct skb_seq_state seq;
	unsigned int consumed = 0;
	int rc = 0;
931

O
Olaf Kirch 已提交
932
	debug_tcp("in %d bytes\n", skb->len - offset);
933 934 935 936 937 938

	if (unlikely(conn->suspend_rx)) {
		debug_tcp("conn %d Rx suspended!\n", conn->id);
		return 0;
	}

O
Olaf Kirch 已提交
939 940 941 942
	skb_prepare_seq_read(skb, offset, skb->len, &seq);
	while (1) {
		unsigned int avail;
		const u8 *ptr;
943

O
Olaf Kirch 已提交
944
		avail = skb_seq_read(consumed, &ptr, &seq);
945 946 947
		if (avail == 0) {
			debug_tcp("no more data avail. Consumed %d\n",
				  consumed);
O
Olaf Kirch 已提交
948
			break;
949 950
		}
		BUG_ON(segment->copied >= segment->size);
O
Olaf Kirch 已提交
951 952

		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
953
		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
O
Olaf Kirch 已提交
954 955 956
		BUG_ON(rc == 0);
		consumed += rc;

957 958 959
		if (segment->total_copied >= segment->total_size) {
			debug_tcp("segment done\n");
			rc = segment->done(tcp_conn, segment);
O
Olaf Kirch 已提交
960 961 962
			if (rc != 0) {
				skb_abort_seq_read(&seq);
				goto error;
963
			}
964

O
Olaf Kirch 已提交
965
			/* The done() functions sets up the
966
			 * next segment. */
967 968
		}
	}
969
	skb_abort_seq_read(&seq);
O
Olaf Kirch 已提交
970 971
	conn->rxdata_octets += consumed;
	return consumed;
972

O
Olaf Kirch 已提交
973 974 975 976
error:
	debug_tcp("Error receiving PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return 0;
977 978 979 980 981 982
}

static void
iscsi_tcp_data_ready(struct sock *sk, int flag)
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
983
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
984 985 986 987
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

988
	/*
O
Olaf Kirch 已提交
989
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
990
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
991
	 * hand us all the skbs that are available. iscsi_tcp_recv
992 993
	 * handled pdus that cross buffers or pdus that still need data.
	 */
994
	rd_desc.arg.data = conn;
995
	rd_desc.count = 1;
O
Olaf Kirch 已提交
996
	tcp_read_sock(sk, &rd_desc, iscsi_tcp_recv);
997 998

	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
999 1000 1001

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
1002
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
1003 1004 1005 1006 1007
}

static void
iscsi_tcp_state_change(struct sock *sk)
{
1008
	struct iscsi_tcp_conn *tcp_conn;
1009 1010 1011 1012 1013 1014 1015 1016 1017
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

M
Mike Christie 已提交
1018 1019 1020
	if ((sk->sk_state == TCP_CLOSE_WAIT ||
	     sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc)) {
1021 1022 1023 1024
		debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

1025 1026
	tcp_conn = conn->dd_data;
	old_state_change = tcp_conn->old_state_change;
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
static void
iscsi_write_space(struct sock *sk)
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
1041 1042 1043
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	tcp_conn->old_write_space(sk);
1044
	debug_tcp("iscsi_write_space: cid %d\n", conn->id);
1045
	scsi_queue_work(conn->session->host, &conn->xmitwork);
1046 1047 1048 1049 1050
}

static void
iscsi_conn_set_callbacks(struct iscsi_conn *conn)
{
1051 1052
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk = tcp_conn->sock->sk;
1053 1054 1055 1056

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
1057 1058 1059
	tcp_conn->old_data_ready = sk->sk_data_ready;
	tcp_conn->old_state_change = sk->sk_state_change;
	tcp_conn->old_write_space = sk->sk_write_space;
1060 1061 1062 1063 1064 1065 1066
	sk->sk_data_ready = iscsi_tcp_data_ready;
	sk->sk_state_change = iscsi_tcp_state_change;
	sk->sk_write_space = iscsi_write_space;
	write_unlock_bh(&sk->sk_callback_lock);
}

static void
1067
iscsi_conn_restore_callbacks(struct iscsi_tcp_conn *tcp_conn)
1068
{
1069
	struct sock *sk = tcp_conn->sock->sk;
1070 1071 1072 1073

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
1074 1075 1076
	sk->sk_data_ready   = tcp_conn->old_data_ready;
	sk->sk_state_change = tcp_conn->old_state_change;
	sk->sk_write_space  = tcp_conn->old_write_space;
1077 1078 1079 1080 1081
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
1082 1083 1084 1085
 * iscsi_xmit - TCP transmit
 **/
static int
iscsi_xmit(struct iscsi_conn *conn)
1086
{
1087
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1088 1089 1090
	struct iscsi_segment *segment = &tcp_conn->out.segment;
	unsigned int consumed = 0;
	int rc = 0;
1091

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
	while (1) {
		rc = iscsi_tcp_xmit_segment(tcp_conn, segment);
		if (rc < 0)
			goto error;
		if (rc == 0)
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
				if (rc < 0)
					goto error;
			}
		}
1108 1109
	}

1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
	debug_tcp("xmit %d bytes\n", consumed);

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
	debug_tcp("Error sending PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return rc;
1121 1122 1123
}

/**
1124 1125
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
1126
static inline int
1127
iscsi_tcp_xmit_qlen(struct iscsi_conn *conn)
1128
{
1129 1130
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_segment *segment = &tcp_conn->out.segment;
1131

1132
	return segment->total_copied - segment->total_size;
1133 1134 1135
}

static inline int
1136
iscsi_tcp_flush(struct iscsi_conn *conn)
1137
{
1138 1139 1140 1141 1142
	int rc;

	while (iscsi_tcp_xmit_qlen(conn)) {
		rc = iscsi_xmit(conn);
		if (rc == 0)
1143
			return -EAGAIN;
1144 1145
		if (rc < 0)
			return rc;
1146
	}
1147

1148
	return 0;
1149 1150
}

1151 1152 1153 1154 1155 1156 1157
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
static int
iscsi_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
			struct iscsi_segment *segment)
1158
{
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
	tcp_conn->out.segment = tcp_conn->out.data_segment;
	debug_tcp("Header done. Next segment size %u total_size %u\n",
		  tcp_conn->out.segment.size, tcp_conn->out.segment.total_size);
	return 0;
}

static void
iscsi_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, size_t hdrlen)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;

	debug_tcp("%s(%p%s)\n", __FUNCTION__, tcp_conn,
			conn->hdrdgst_en? ", digest enabled" : "");

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
	memset(&tcp_conn->out.data_segment, 0, sizeof(struct iscsi_segment));

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
	 * sure that both iscsi_tcp_ctask and mtask have
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
		iscsi_tcp_dgst_header(&tcp_conn->tx_hash, hdr, hdrlen,
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
	tcp_conn->out.hdr = hdr;

	iscsi_segment_init_linear(&tcp_conn->out.segment, hdr, hdrlen,
				iscsi_tcp_send_hdr_done, NULL);
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
iscsi_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			 unsigned int count, unsigned int offset,
			 unsigned int len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

	debug_tcp("%s(%p, offset=%d, datalen=%d%s)\n", __FUNCTION__,
			tcp_conn, offset, len,
			conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	return iscsi_segment_seek_sg(&tcp_conn->out.data_segment,
				   sg, count, offset, len,
				   NULL, tx_hash);
}

static void
iscsi_tcp_send_linear_data_prepare(struct iscsi_conn *conn, void *data,
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

	debug_tcp("%s(%p, datalen=%d%s)\n", __FUNCTION__, tcp_conn, len,
		  conn->datadgst_en? ", digest enabled" : "");

	/* Make sure the datalen matches what the caller
	   said he would send. */
	hdr_spec_len = ntoh24(tcp_conn->out.hdr->dlength);
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
		tx_hash = &tcp_conn->tx_hash;

	iscsi_segment_init_linear(&tcp_conn->out.data_segment,
				data, len, NULL, tx_hash);
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263
}

/**
 * iscsi_solicit_data_cont - initialize next Data-Out
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @r2t: R2T info
 * @left: bytes left to transfer
 *
 * Notes:
 *	Initialize next Data-Out within this R2T sequence and continue
 *	to process next Scatter-Gather element(if any) of this SCSI command.
 *
 *	Called under connection lock.
 **/
1264
static int
1265
iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
1266
			struct iscsi_r2t_info *r2t)
1267 1268
{
	struct iscsi_data *hdr;
1269 1270 1271 1272 1273 1274
	int new_offset, left;

	BUG_ON(r2t->data_length - r2t->sent < 0);
	left = r2t->data_length - r2t->sent;
	if (left == 0)
		return 0;
1275

1276
	hdr = &r2t->dtask.hdr;
1277 1278 1279 1280 1281
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
1282 1283
	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
	hdr->itt = ctask->hdr->itt;
1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295
	hdr->exp_statsn = r2t->exp_statsn;
	new_offset = r2t->data_offset + r2t->sent;
	hdr->offset = cpu_to_be32(new_offset);
	if (left > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
	} else {
		hton24(hdr->dlength, left);
		r2t->data_count = left;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}

1296 1297
	conn->dataout_pdus_cnt++;
	return 1;
1298 1299 1300
}

/**
1301
 * iscsi_tcp_ctask - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
1302 1303 1304 1305
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @sc: scsi command
 **/
1306 1307
static int
iscsi_tcp_ctask_init(struct iscsi_cmd_task *ctask)
1308
{
1309
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1310 1311 1312
	struct iscsi_conn *conn = ctask->conn;
	struct scsi_cmnd *sc = ctask->sc;
	int err;
1313

1314
	BUG_ON(__kfifo_len(tcp_ctask->r2tqueue));
1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334
	tcp_ctask->sent = 0;
	tcp_ctask->exp_datasn = 0;

	/* Prepare PDU, optionally w/ immediate data */
	debug_scsi("ctask deq [cid %d itt 0x%x imm %d unsol %d]\n",
		    conn->id, ctask->itt, ctask->imm_count,
		    ctask->unsol_count);
	iscsi_tcp_send_hdr_prep(conn, ctask->hdr, ctask->hdr_len);

	if (!ctask->imm_count)
		return 0;

	/* If we have immediate data, attach a payload */
	err = iscsi_tcp_send_data_prep(conn, scsi_sglist(sc), scsi_sg_count(sc),
				       0, ctask->imm_count);
	if (err)
		return err;
	tcp_ctask->sent += ctask->imm_count;
	ctask->imm_count = 0;
	return 0;
1335 1336 1337
}

/**
1338
 * iscsi_tcp_mtask_xmit - xmit management(immediate) task
1339 1340 1341 1342 1343 1344 1345 1346 1347
 * @conn: iscsi connection
 * @mtask: task management task
 *
 * Notes:
 *	The function can return -EAGAIN in which case caller must
 *	call it again later, or recover. '0' return code means successful
 *	xmit.
 **/
static int
1348
iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
1349
{
1350
	int rc;
1351

1352 1353 1354 1355
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
		return rc;
1356

A
Al Viro 已提交
1357
	if (mtask->hdr->itt == RESERVED_ITT) {
1358 1359 1360
		struct iscsi_session *session = conn->session;

		spin_lock_bh(&session->lock);
1361
		iscsi_free_mgmt_task(conn, mtask);
1362 1363
		spin_unlock_bh(&session->lock);
	}
1364

1365 1366 1367
	return 0;
}

1368 1369 1370 1371 1372 1373 1374 1375 1376
/*
 * iscsi_tcp_ctask_xmit - xmit normal PDU task
 * @conn: iscsi connection
 * @ctask: iscsi command task
 *
 * We're expected to return 0 when everything was transmitted succesfully,
 * -EAGAIN if there's still data in the queue, or != 0 for any other kind
 * of error.
 */
1377
static int
1378
iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
1379
{
1380
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1381
	struct scsi_cmnd *sc = ctask->sc;
1382
	int rc = 0;
1383

1384 1385 1386 1387
flush:
	/* Flush any pending data first. */
	rc = iscsi_tcp_flush(conn);
	if (rc < 0)
1388 1389
		return rc;

1390 1391 1392
	/* Are we done already? */
	if (sc->sc_data_direction != DMA_TO_DEVICE)
		return 0;
1393

1394 1395
	if (ctask->unsol_count != 0) {
		struct iscsi_data *hdr = &tcp_ctask->unsol_dtask.hdr;
1396

1397 1398 1399 1400 1401 1402
		/* Prepare a header for the unsolicited PDU.
		 * The amount of data we want to send will be
		 * in ctask->data_count.
		 * FIXME: return the data count instead.
		 */
		iscsi_prep_unsolicit_data_pdu(ctask, hdr);
1403

1404 1405
		debug_tcp("unsol dout [itt 0x%x doff %d dlen %d]\n",
				ctask->itt, tcp_ctask->sent, ctask->data_count);
1406

1407 1408 1409 1410 1411
		iscsi_tcp_send_hdr_prep(conn, hdr, sizeof(*hdr));
		rc = iscsi_tcp_send_data_prep(conn, scsi_sglist(sc),
					      scsi_sg_count(sc),
					      tcp_ctask->sent,
					      ctask->data_count);
1412
		if (rc)
1413 1414 1415 1416 1417 1418 1419
			goto fail;
		tcp_ctask->sent += ctask->data_count;
		ctask->unsol_count -= ctask->data_count;
		goto flush;
	} else {
		struct iscsi_session *session = conn->session;
		struct iscsi_r2t_info *r2t;
1420

1421
		/* All unsolicited PDUs sent. Check for solicited PDUs.
1422
		 */
1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433
		spin_lock_bh(&session->lock);
		r2t = tcp_ctask->r2t;
		if (r2t != NULL) {
			/* Continue with this R2T? */
			if (!iscsi_solicit_data_cont(conn, ctask, r2t)) {
				debug_scsi("  done with r2t %p\n", r2t);

				__kfifo_put(tcp_ctask->r2tpool.queue,
					    (void*)&r2t, sizeof(void*));
				tcp_ctask->r2t = r2t = NULL;
			}
1434 1435
		}

1436
		if (r2t == NULL) {
1437 1438
			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
				    sizeof(void*));
1439
			r2t = tcp_ctask->r2t;
1440
		}
1441
		spin_unlock_bh(&session->lock);
1442

1443 1444 1445 1446
		/* Waiting for more R2Ts to arrive. */
		if (r2t == NULL) {
			debug_tcp("no R2Ts yet\n");
			return 0;
1447 1448
		}

1449 1450 1451
		debug_scsi("sol dout %p [dsn %d itt 0x%x doff %d dlen %d]\n",
			r2t, r2t->solicit_datasn - 1, ctask->itt,
			r2t->data_offset + r2t->sent, r2t->data_count);
1452

1453 1454
		iscsi_tcp_send_hdr_prep(conn, &r2t->dtask.hdr,
					sizeof(struct iscsi_hdr));
1455

1456 1457 1458 1459
		rc = iscsi_tcp_send_data_prep(conn, scsi_sglist(sc),
					      scsi_sg_count(sc),
					      r2t->data_offset + r2t->sent,
					      r2t->data_count);
1460
		if (rc)
1461 1462 1463 1464
			goto fail;
		tcp_ctask->sent += r2t->data_count;
		r2t->sent += r2t->data_count;
		goto flush;
1465 1466
	}
	return 0;
1467 1468 1469
fail:
	iscsi_conn_failure(conn, rc);
	return -EIO;
1470 1471
}

1472 1473
static struct iscsi_cls_conn *
iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
1474
{
1475 1476 1477
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
1478

1479 1480 1481 1482
	cls_conn = iscsi_conn_setup(cls_session, conn_idx);
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
1483
	/*
1484 1485
	 * due to strange issues with iser these are not set
	 * in iscsi_conn_setup
1486
	 */
1487
	conn->max_recv_dlength = ISCSI_DEF_MAX_RECV_SEG_LEN;
1488

1489 1490 1491
	tcp_conn = kzalloc(sizeof(*tcp_conn), GFP_KERNEL);
	if (!tcp_conn)
		goto tcp_conn_alloc_fail;
1492

1493 1494
	conn->dd_data = tcp_conn;
	tcp_conn->iscsi_conn = conn;
1495

1496 1497 1498
	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->tx_hash.flags = 0;
1499 1500 1501 1502 1503
	if (IS_ERR(tcp_conn->tx_hash.tfm)) {
		printk(KERN_ERR "Could not create connection due to crc32c "
		       "loading error %ld. Make sure the crc32c module is "
		       "built as a module or into the kernel\n",
			PTR_ERR(tcp_conn->tx_hash.tfm));
1504
		goto free_tcp_conn;
1505
	}
1506

1507 1508 1509
	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						  CRYPTO_ALG_ASYNC);
	tcp_conn->rx_hash.flags = 0;
1510 1511 1512 1513 1514
	if (IS_ERR(tcp_conn->rx_hash.tfm)) {
		printk(KERN_ERR "Could not create connection due to crc32c "
		       "loading error %ld. Make sure the crc32c module is "
		       "built as a module or into the kernel\n",
			PTR_ERR(tcp_conn->rx_hash.tfm));
1515
		goto free_tx_tfm;
1516
	}
1517

1518
	return cls_conn;
1519

1520
free_tx_tfm:
1521
	crypto_free_hash(tcp_conn->tx_hash.tfm);
1522 1523
free_tcp_conn:
	kfree(tcp_conn);
1524 1525 1526
tcp_conn_alloc_fail:
	iscsi_conn_teardown(cls_conn);
	return NULL;
1527 1528
}

1529 1530 1531
static void
iscsi_tcp_release_conn(struct iscsi_conn *conn)
{
1532
	struct iscsi_session *session = conn->session;
1533
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1534
	struct socket *sock = tcp_conn->sock;
1535

1536
	if (!sock)
1537 1538
		return;

1539
	sock_hold(sock->sk);
1540
	iscsi_conn_restore_callbacks(tcp_conn);
1541
	sock_put(sock->sk);
1542

1543
	spin_lock_bh(&session->lock);
1544 1545
	tcp_conn->sock = NULL;
	conn->recv_lock = NULL;
1546 1547
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
1548 1549
}

1550
static void
1551
iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
1552
{
1553 1554
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1555

1556
	iscsi_tcp_release_conn(conn);
1557
	iscsi_conn_teardown(cls_conn);
1558

P
Pete Wyckoff 已提交
1559 1560 1561 1562
	if (tcp_conn->tx_hash.tfm)
		crypto_free_hash(tcp_conn->tx_hash.tfm);
	if (tcp_conn->rx_hash.tfm)
		crypto_free_hash(tcp_conn->rx_hash.tfm);
1563

1564 1565
	kfree(tcp_conn);
}
1566

1567 1568 1569 1570 1571 1572 1573 1574 1575
static void
iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
	struct iscsi_conn *conn = cls_conn->dd_data;

	iscsi_conn_stop(cls_conn, flag);
	iscsi_tcp_release_conn(conn);
}

1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
			      char *buf, int *port,
			      int (*getname)(struct socket *, struct sockaddr *,
					int *addrlen))
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

1586
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
		sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr));
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

1616 1617
static int
iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
1618
		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
1619 1620 1621 1622 1623 1624 1625
		    int is_leading)
{
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct sock *sk;
	struct socket *sock;
	int err;
1626

1627
	/* lookup for existing socket */
1628
	sock = sockfd_lookup((int)transport_eph, &err);
1629 1630 1631
	if (!sock) {
		printk(KERN_ERR "iscsi_tcp: sockfd_lookup failed %d\n", err);
		return -EEXIST;
1632
	}
1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
	err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
				 &conn->portal_port, kernel_getpeername);
	if (err)
		goto free_socket;

	err = iscsi_tcp_get_addr(conn, sock, conn->local_address,
				&conn->local_port, kernel_getsockname);
	if (err)
		goto free_socket;
1647

1648 1649
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
1650
		goto free_socket;
1651

1652 1653
	/* bind iSCSI connection and socket */
	tcp_conn->sock = sock;
1654

1655 1656 1657 1658 1659
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
1660

1661
	/* FIXME: disable Nagle's algorithm */
1662

1663 1664 1665 1666 1667 1668 1669 1670 1671 1672
	/*
	 * Intercept TCP callbacks for sendfile like receive
	 * processing.
	 */
	conn->recv_lock = &sk->sk_callback_lock;
	iscsi_conn_set_callbacks(conn);
	tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
1673
	iscsi_tcp_hdr_recv_prep(tcp_conn);
1674
	return 0;
1675 1676 1677 1678

free_socket:
	sockfd_put(sock);
	return err;
1679 1680
}

1681
/* called with host lock */
M
Mike Christie 已提交
1682
static void
1683
iscsi_tcp_mtask_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
1684
{
1685 1686 1687 1688 1689 1690 1691 1692 1693
	debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt);

	/* Prepare PDU, optionally w/ immediate data */
	iscsi_tcp_send_hdr_prep(conn, mtask->hdr, sizeof(*mtask->hdr));

	/* If we have immediate data, attach a payload */
	if (mtask->data_count)
		iscsi_tcp_send_linear_data_prepare(conn, mtask->data,
						   mtask->data_count);
1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706
}

static int
iscsi_r2tpool_alloc(struct iscsi_session *session)
{
	int i;
	int cmd_i;

	/*
	 * initialize per-task: R2T pool and xmit queue
	 */
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
	        struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
1707
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1708 1709 1710 1711 1712 1713 1714 1715

		/*
		 * pre-allocated x4 as much r2ts to handle race when
		 * target acks DataOut faster than we data_xmit() queues
		 * could replenish r2tqueue.
		 */

		/* R2T pool */
1716
		if (iscsi_pool_init(&tcp_ctask->r2tpool, session->max_r2t * 4, NULL,
1717
				    sizeof(struct iscsi_r2t_info))) {
1718 1719 1720 1721
			goto r2t_alloc_fail;
		}

		/* R2T xmit queue */
1722
		tcp_ctask->r2tqueue = kfifo_alloc(
1723
		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
1724
		if (tcp_ctask->r2tqueue == ERR_PTR(-ENOMEM)) {
1725
			iscsi_pool_free(&tcp_ctask->r2tpool);
1726 1727 1728 1729 1730 1731 1732 1733
			goto r2t_alloc_fail;
		}
	}

	return 0;

r2t_alloc_fail:
	for (i = 0; i < cmd_i; i++) {
1734 1735 1736 1737
		struct iscsi_cmd_task *ctask = session->cmds[i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;

		kfifo_free(tcp_ctask->r2tqueue);
1738
		iscsi_pool_free(&tcp_ctask->r2tpool);
1739 1740 1741 1742 1743 1744 1745 1746 1747 1748
	}
	return -ENOMEM;
}

static void
iscsi_r2tpool_free(struct iscsi_session *session)
{
	int i;

	for (i = 0; i < session->cmds_max; i++) {
1749 1750
		struct iscsi_cmd_task *ctask = session->cmds[i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
1751

1752
		kfifo_free(tcp_ctask->r2tqueue);
1753
		iscsi_pool_free(&tcp_ctask->r2tpool);
1754 1755 1756 1757
	}
}

static int
1758
iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
1759
		     char *buf, int buflen)
1760
{
1761
	struct iscsi_conn *conn = cls_conn->dd_data;
1762
	struct iscsi_session *session = conn->session;
1763
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1764
	int value;
1765 1766 1767

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
1768
		iscsi_set_param(cls_conn, param, buf, buflen);
1769 1770
		break;
	case ISCSI_PARAM_DATADGST_EN:
1771
		iscsi_set_param(cls_conn, param, buf, buflen);
1772 1773
		tcp_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
1774 1775
		break;
	case ISCSI_PARAM_MAX_R2T:
1776
		sscanf(buf, "%d", &value);
1777 1778 1779
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
1780 1781
			break;
		iscsi_r2tpool_free(session);
1782
		iscsi_set_param(cls_conn, param, buf, buflen);
1783 1784 1785 1786
		if (iscsi_r2tpool_alloc(session))
			return -ENOMEM;
		break;
	default:
1787
		return iscsi_set_param(cls_conn, param, buf, buflen);
1788 1789 1790 1791 1792 1793
	}

	return 0;
}

static int
1794 1795
iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
			 enum iscsi_param param, char *buf)
1796
{
1797
	struct iscsi_conn *conn = cls_conn->dd_data;
1798
	int len;
1799 1800

	switch(param) {
1801
	case ISCSI_PARAM_CONN_PORT:
1802 1803 1804
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
1805
		break;
1806
	case ISCSI_PARAM_CONN_ADDRESS:
1807 1808 1809
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
1810 1811
		break;
	default:
1812
		return iscsi_conn_get_param(cls_conn, param, buf);
1813 1814 1815 1816 1817
	}

	return len;
}

1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840
static int
iscsi_tcp_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
			 char *buf)
{
        struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
	int len;

	switch (param) {
	case ISCSI_HOST_PARAM_IPADDRESS:
		spin_lock_bh(&session->lock);
		if (!session->leadconn)
			len = -ENODEV;
		else
			len = sprintf(buf, "%s\n",
				     session->leadconn->local_address);
		spin_unlock_bh(&session->lock);
		break;
	default:
		return iscsi_host_get_param(shost, param, buf);
	}
	return len;
}

1841
static void
1842
iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
1843
{
1844
	struct iscsi_conn *conn = cls_conn->dd_data;
1845
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857

	stats->txdata_octets = conn->txdata_octets;
	stats->rxdata_octets = conn->rxdata_octets;
	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
	stats->dataout_pdus = conn->dataout_pdus_cnt;
	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
	stats->datain_pdus = conn->datain_pdus_cnt;
	stats->r2t_pdus = conn->r2t_pdus_cnt;
	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
1858
	stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
1859
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
1860
	stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
1861 1862 1863 1864
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
}

1865 1866 1867
static struct iscsi_cls_session *
iscsi_tcp_session_create(struct iscsi_transport *iscsit,
			 struct scsi_transport_template *scsit,
1868
			 uint16_t cmds_max, uint16_t qdepth,
1869
			 uint32_t initial_cmdsn, uint32_t *hostno)
1870
{
1871 1872 1873 1874
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
	uint32_t hn;
	int cmd_i;
1875

1876
	cls_session = iscsi_session_setup(iscsit, scsit, cmds_max, qdepth,
1877 1878 1879 1880 1881 1882
					 sizeof(struct iscsi_tcp_cmd_task),
					 sizeof(struct iscsi_tcp_mgmt_task),
					 initial_cmdsn, &hn);
	if (!cls_session)
		return NULL;
	*hostno = hn;
1883

1884 1885 1886 1887 1888
	session = class_to_transport_session(cls_session);
	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
		struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
		struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;

1889 1890
		ctask->hdr = &tcp_ctask->hdr.cmd_hdr;
		ctask->hdr_max = sizeof(tcp_ctask->hdr) - ISCSI_DIGEST_SIZE;
1891 1892 1893 1894 1895 1896
	}

	for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++) {
		struct iscsi_mgmt_task *mtask = session->mgmt_cmds[cmd_i];
		struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;

1897
		mtask->hdr = (struct iscsi_hdr *) &tcp_mtask->hdr;
1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913
	}

	if (iscsi_r2tpool_alloc(class_to_transport_session(cls_session)))
		goto r2tpool_alloc_fail;

	return cls_session;

r2tpool_alloc_fail:
	iscsi_session_teardown(cls_session);
	return NULL;
}

static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
{
	iscsi_r2tpool_free(class_to_transport_session(cls_session));
	iscsi_session_teardown(cls_session);
1914 1915
}

1916 1917
static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
{
1918
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
1919 1920 1921 1922
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

1923
static struct scsi_host_template iscsi_sht = {
1924
	.module			= THIS_MODULE,
1925
	.name			= "iSCSI Initiator over TCP/IP",
1926 1927
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
1928
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
1929
	.sg_tablesize		= 4096,
1930
	.max_sectors		= 0xFFFF,
1931 1932
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
1933
	.eh_device_reset_handler= iscsi_eh_device_reset,
1934 1935
	.eh_host_reset_handler	= iscsi_eh_host_reset,
	.use_clustering         = DISABLE_CLUSTERING,
1936
	.slave_configure        = iscsi_tcp_slave_configure,
1937 1938 1939 1940
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

1941 1942 1943 1944 1945
static struct iscsi_transport iscsi_tcp_transport = {
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
1959
				  ISCSI_CONN_ADDRESS |
1960 1961 1962
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
1963 1964
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
1965
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
1966 1967 1968
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
				  ISCSI_LU_RESET_TMO |
				  ISCSI_PING_TMO | ISCSI_RECV_TMO,
1969
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
1970 1971
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
1972
	.host_template		= &iscsi_sht,
1973
	.conndata_size		= sizeof(struct iscsi_conn),
1974
	.max_conn		= 1,
1975
	.max_cmd_len		= 16,
1976 1977 1978 1979 1980 1981 1982
	/* session management */
	.create_session		= iscsi_tcp_session_create,
	.destroy_session	= iscsi_tcp_session_destroy,
	/* connection management */
	.create_conn		= iscsi_tcp_conn_create,
	.bind_conn		= iscsi_tcp_conn_bind,
	.destroy_conn		= iscsi_tcp_conn_destroy,
1983
	.set_param		= iscsi_conn_set_param,
1984
	.get_conn_param		= iscsi_tcp_conn_get_param,
1985
	.get_session_param	= iscsi_session_get_param,
1986
	.start_conn		= iscsi_conn_start,
1987
	.stop_conn		= iscsi_tcp_conn_stop,
1988
	/* iscsi host params */
1989
	.get_host_param		= iscsi_tcp_host_get_param,
1990
	.set_host_param		= iscsi_host_set_param,
1991
	/* IO */
1992 1993
	.send_pdu		= iscsi_conn_send_pdu,
	.get_stats		= iscsi_conn_get_stats,
1994 1995
	.init_cmd_task		= iscsi_tcp_ctask_init,
	.init_mgmt_task		= iscsi_tcp_mtask_init,
1996 1997 1998 1999
	.xmit_cmd_task		= iscsi_tcp_ctask_xmit,
	.xmit_mgmt_task		= iscsi_tcp_mtask_xmit,
	.cleanup_cmd_task	= iscsi_tcp_cleanup_ctask,
	/* recovery */
M
Mike Christie 已提交
2000
	.session_recovery_timedout = iscsi_session_recovery_timedout,
2001 2002 2003 2004 2005 2006
};

static int __init
iscsi_tcp_init(void)
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
2007 2008
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
2009 2010 2011 2012
		return -EINVAL;
	}
	iscsi_tcp_transport.max_lun = iscsi_max_lun;

2013
	if (!iscsi_register_transport(&iscsi_tcp_transport))
2014
		return -ENODEV;
2015

2016
	return 0;
2017 2018 2019 2020 2021 2022 2023 2024 2025 2026
}

static void __exit
iscsi_tcp_exit(void)
{
	iscsi_unregister_transport(&iscsi_tcp_transport);
}

module_init(iscsi_tcp_init);
module_exit(iscsi_tcp_exit);