iscsi_tcp.c 26.7 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/inet.h>
M
Mike Christie 已提交
31
#include <linux/file.h>
32 33 34 35 36 37 38
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
39
#include <scsi/scsi_device.h>
40 41 42 43 44 45
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

46 47
MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, "
	      "Dmitry Yusupov <dmitry_yus@yahoo.com>, "
48 49 50 51
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");

52 53 54
static struct scsi_transport_template *iscsi_sw_tcp_scsi_transport;
static struct scsi_host_template iscsi_sw_tcp_sht;
static struct iscsi_transport iscsi_sw_tcp_transport;
55

56 57 58
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
static int iscsi_sw_tcp_dbg;
module_param_named(debug_iscsi_tcp, iscsi_sw_tcp_dbg, int,
		   S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug_iscsi_tcp, "Turn on debugging for iscsi_tcp module "
		 "Set to 1 to turn on, and zero to turn off. Default is off.");

#define ISCSI_SW_TCP_DBG(_conn, dbg_fmt, arg...)		\
	do {							\
		if (iscsi_sw_tcp_dbg)				\
			iscsi_conn_printk(KERN_INFO, _conn,	\
					     "%s " dbg_fmt,	\
					     __func__, ##arg);	\
	} while (0);


O
Olaf Kirch 已提交
74
/**
75
 * iscsi_sw_tcp_recv - TCP receive in sendfile fashion
76 77 78 79
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
80 81 82
 */
static int iscsi_sw_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
			     unsigned int offset, size_t len)
83 84 85 86 87
{
	struct iscsi_conn *conn = rd_desc->arg.data;
	unsigned int consumed, total_consumed = 0;
	int status;

88
	ISCSI_SW_TCP_DBG(conn, "in %d bytes\n", skb->len - offset);
89 90 91 92 93 94 95 96

	do {
		status = 0;
		consumed = iscsi_tcp_recv_skb(conn, skb, offset, 0, &status);
		offset += consumed;
		total_consumed += consumed;
	} while (consumed != 0 && status != ISCSI_TCP_SKB_DONE);

97 98
	ISCSI_SW_TCP_DBG(conn, "read %d bytes status %d\n",
			 skb->len - offset, status);
99
	return total_consumed;
100 101
}

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
/**
 * iscsi_sw_sk_state_check - check socket state
 * @sk: socket
 *
 * If the socket is in CLOSE or CLOSE_WAIT we should
 * not close the connection if there is still some
 * data pending.
 */
static inline int iscsi_sw_sk_state_check(struct sock *sk)
{
	if ((sk->sk_state == TCP_CLOSE_WAIT ||
	     sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc))
		return -ECONNRESET;

	return 0;
}

120
static void iscsi_sw_tcp_data_ready(struct sock *sk, int flag)
121 122
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
123
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
124 125 126 127
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

128
	/*
O
Olaf Kirch 已提交
129
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
130
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
131
	 * hand us all the skbs that are available. iscsi_tcp_recv
132 133
	 * handled pdus that cross buffers or pdus that still need data.
	 */
134
	rd_desc.arg.data = conn;
135
	rd_desc.count = 1;
136
	tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv);
137

138 139 140 141 142 143
	if (iscsi_sw_sk_state_check(sk) < 0) {
		ISCSI_SW_TCP_DBG(conn, "iscsi_tcp_data_ready: "
				 "TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

144
	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
145 146 147

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
148
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
149 150
}

151
static void iscsi_sw_tcp_state_change(struct sock *sk)
152
{
153
	struct iscsi_tcp_conn *tcp_conn;
154
	struct iscsi_sw_tcp_conn *tcp_sw_conn;
155 156 157 158 159 160 161 162 163
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

164
	if (iscsi_sw_sk_state_check(sk) < 0) {
165 166
		ISCSI_SW_TCP_DBG(conn, "iscsi_tcp_state_change: "
				 "TCP_CLOSE|TCP_CLOSE_WAIT\n");
167 168 169
		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	}

170
	tcp_conn = conn->dd_data;
171 172
	tcp_sw_conn = tcp_conn->dd_data;
	old_state_change = tcp_sw_conn->old_state_change;
173 174 175 176 177 178 179 180 181 182

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
183
static void iscsi_sw_tcp_write_space(struct sock *sk)
184 185
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
186
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
187
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
188

189
	tcp_sw_conn->old_write_space(sk);
190
	ISCSI_SW_TCP_DBG(conn, "iscsi_write_space\n");
191
	iscsi_conn_queue_work(conn);
192 193
}

194
static void iscsi_sw_tcp_conn_set_callbacks(struct iscsi_conn *conn)
195
{
196
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
197 198
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct sock *sk = tcp_sw_conn->sock->sk;
199 200 201 202

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
203 204 205 206 207 208
	tcp_sw_conn->old_data_ready = sk->sk_data_ready;
	tcp_sw_conn->old_state_change = sk->sk_state_change;
	tcp_sw_conn->old_write_space = sk->sk_write_space;
	sk->sk_data_ready = iscsi_sw_tcp_data_ready;
	sk->sk_state_change = iscsi_sw_tcp_state_change;
	sk->sk_write_space = iscsi_sw_tcp_write_space;
209 210 211
	write_unlock_bh(&sk->sk_callback_lock);
}

212 213
static void
iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_sw_tcp_conn *tcp_sw_conn)
214
{
215
	struct sock *sk = tcp_sw_conn->sock->sk;
216 217 218 219

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
220 221 222
	sk->sk_data_ready   = tcp_sw_conn->old_data_ready;
	sk->sk_state_change = tcp_sw_conn->old_state_change;
	sk->sk_write_space  = tcp_sw_conn->old_write_space;
223 224 225 226 227
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
228
 * iscsi_sw_tcp_xmit_segment - transmit segment
229
 * @tcp_conn: the iSCSI TCP connection
230 231 232 233 234 235 236 237 238 239
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
240
static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
241 242
				     struct iscsi_segment *segment)
{
243
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
244 245 246 247
	struct socket *sk = tcp_sw_conn->sock;
	unsigned int copied = 0;
	int r = 0;

248
	while (!iscsi_tcp_segment_done(tcp_conn, segment, 0, r)) {
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_sw_conn->sendpage(sk, sg_page(sg), offset,
						  copy, flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_sw_tcp_xmit - TCP transmit
287
 **/
288
static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
289
{
290
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
291 292
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
293 294
	unsigned int consumed = 0;
	int rc = 0;
295

296
	while (1) {
297
		rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment);
298 299 300 301 302 303 304 305
		/*
		 * We may not have been able to send data because the conn
		 * is getting stopped. libiscsi will know so propogate err
		 * for it to do the right thing.
		 */
		if (rc == -EAGAIN)
			return rc;
		else if (rc < 0) {
306
			rc = ISCSI_ERR_XMIT_FAILED;
307
			goto error;
308
		} else if (rc == 0)
309 310 311 312 313 314 315
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
316
				if (rc != 0)
317 318 319
					goto error;
			}
		}
320 321
	}

322
	ISCSI_SW_TCP_DBG(conn, "xmit %d bytes\n", consumed);
323 324 325 326 327 328 329

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
330
	ISCSI_SW_TCP_DBG(conn, "Error sending PDU, errno=%d\n", rc);
331 332
	iscsi_conn_failure(conn, rc);
	return -EIO;
333 334 335
}

/**
336 337
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
338
static inline int iscsi_sw_tcp_xmit_qlen(struct iscsi_conn *conn)
339
{
340
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
341 342
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
343

344
	return segment->total_copied - segment->total_size;
345 346
}

347
static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
348
{
349
	struct iscsi_conn *conn = task->conn;
350 351
	int rc;

352 353
	while (iscsi_sw_tcp_xmit_qlen(conn)) {
		rc = iscsi_sw_tcp_xmit(conn);
354
		if (rc == 0)
355
			return -EAGAIN;
356 357
		if (rc < 0)
			return rc;
358
	}
359

360
	return 0;
361 362
}

363 364 365 366
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
367 368
static int iscsi_sw_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
				      struct iscsi_segment *segment)
369
{
370 371 372
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;

	tcp_sw_conn->out.segment = tcp_sw_conn->out.data_segment;
373 374 375 376
	ISCSI_SW_TCP_DBG(tcp_conn->iscsi_conn,
			 "Header done. Next segment size %u total_size %u\n",
			 tcp_sw_conn->out.segment.size,
			 tcp_sw_conn->out.segment.total_size);
377 378 379
	return 0;
}

380 381
static void iscsi_sw_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr,
				       size_t hdrlen)
382 383
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
384
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
385

386 387
	ISCSI_SW_TCP_DBG(conn, "%s\n", conn->hdrdgst_en ?
			 "digest enabled" : "digest disabled");
388 389 390

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
391 392
	memset(&tcp_sw_conn->out.data_segment, 0,
	       sizeof(struct iscsi_segment));
393 394 395

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
396
	 * sure that both iscsi_tcp_task and mtask have
397 398 399
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
400
		iscsi_tcp_dgst_header(&tcp_sw_conn->tx_hash, hdr, hdrlen,
401 402 403 404 405 406 407
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
408
	tcp_sw_conn->out.hdr = hdr;
409

410 411
	iscsi_segment_init_linear(&tcp_sw_conn->out.segment, hdr, hdrlen,
				  iscsi_sw_tcp_send_hdr_done, NULL);
412 413 414 415 416 417 418 419
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
420 421 422
iscsi_sw_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			    unsigned int count, unsigned int offset,
			    unsigned int len)
423 424
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
425
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
426 427 428
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

429 430 431
	ISCSI_SW_TCP_DBG(conn, "offset=%d, datalen=%d %s\n", offset, len,
			 conn->datadgst_en ?
			 "digest enabled" : "digest disabled");
432 433 434

	/* Make sure the datalen matches what the caller
	   said he would send. */
435
	hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
436 437 438
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
439
		tx_hash = &tcp_sw_conn->tx_hash;
440

441 442 443
	return iscsi_segment_seek_sg(&tcp_sw_conn->out.data_segment,
				     sg, count, offset, len,
				     NULL, tx_hash);
444 445 446
}

static void
447
iscsi_sw_tcp_send_linear_data_prep(struct iscsi_conn *conn, void *data,
448 449 450
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
451
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
452 453 454
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

455 456
	ISCSI_SW_TCP_DBG(conn, "datalen=%zd %s\n", len, conn->datadgst_en ?
			 "digest enabled" : "digest disabled");
457 458 459

	/* Make sure the datalen matches what the caller
	   said he would send. */
460
	hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
461 462 463
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
464
		tx_hash = &tcp_sw_conn->tx_hash;
465

466
	iscsi_segment_init_linear(&tcp_sw_conn->out.data_segment,
467
				data, len, NULL, tx_hash);
468 469
}

470 471
static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task,
				 unsigned int offset, unsigned int count)
472 473 474 475
{
	struct iscsi_conn *conn = task->conn;
	int err = 0;

476
	iscsi_sw_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
477 478 479 480 481

	if (!count)
		return 0;

	if (!task->sc)
482
		iscsi_sw_tcp_send_linear_data_prep(conn, task->data, count);
483 484 485
	else {
		struct scsi_data_buffer *sdb = scsi_out(task->sc);

486 487 488
		err = iscsi_sw_tcp_send_data_prep(conn, sdb->table.sgl,
						  sdb->table.nents, offset,
						  count);
489 490 491
	}

	if (err) {
492
		/* got invalid offset/len */
493 494 495 496 497
		return -EIO;
	}
	return 0;
}

498
static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
499
{
500
	struct iscsi_tcp_task *tcp_task = task->dd_data;
501

502 503
	task->hdr = task->dd_data + sizeof(*tcp_task);
	task->hdr_max = sizeof(struct iscsi_sw_tcp_hdrbuf) - ISCSI_DIGEST_SIZE;
504
	return 0;
505 506
}

507
static struct iscsi_cls_conn *
508 509
iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session,
			 uint32_t conn_idx)
510
{
511 512 513
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
514
	struct iscsi_sw_tcp_conn *tcp_sw_conn;
515

516 517
	cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*tcp_sw_conn),
					conn_idx);
518 519 520
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
521
	tcp_conn = conn->dd_data;
522
	tcp_sw_conn = tcp_conn->dd_data;
523

524 525 526 527
	tcp_sw_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						     CRYPTO_ALG_ASYNC);
	tcp_sw_conn->tx_hash.flags = 0;
	if (IS_ERR(tcp_sw_conn->tx_hash.tfm))
528
		goto free_conn;
529

530 531 532 533
	tcp_sw_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						     CRYPTO_ALG_ASYNC);
	tcp_sw_conn->rx_hash.flags = 0;
	if (IS_ERR(tcp_sw_conn->rx_hash.tfm))
534
		goto free_tx_tfm;
535
	tcp_conn->rx_hash = &tcp_sw_conn->rx_hash;
536

537
	return cls_conn;
538

539
free_tx_tfm:
540
	crypto_free_hash(tcp_sw_conn->tx_hash.tfm);
541
free_conn:
542 543 544 545 546
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
547
	iscsi_tcp_conn_teardown(cls_conn);
548
	return NULL;
549 550
}

551
static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn)
552
{
553
	struct iscsi_session *session = conn->session;
554
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
555 556
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct socket *sock = tcp_sw_conn->sock;
557

558
	if (!sock)
559 560
		return;

561
	sock_hold(sock->sk);
562
	iscsi_sw_tcp_conn_restore_callbacks(tcp_sw_conn);
563
	sock_put(sock->sk);
564

565
	spin_lock_bh(&session->lock);
566
	tcp_sw_conn->sock = NULL;
567 568
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
569 570
}

571
static void iscsi_sw_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
572
{
573 574
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
575
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
576

577
	iscsi_sw_tcp_release_conn(conn);
578

579 580 581 582
	if (tcp_sw_conn->tx_hash.tfm)
		crypto_free_hash(tcp_sw_conn->tx_hash.tfm);
	if (tcp_sw_conn->rx_hash.tfm)
		crypto_free_hash(tcp_sw_conn->rx_hash.tfm);
583

584
	iscsi_tcp_conn_teardown(cls_conn);
585
}
586

587
static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
588 589
{
	struct iscsi_conn *conn = cls_conn->dd_data;
590
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
591
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
592 593

	/* userspace may have goofed up and not bound us */
594
	if (!tcp_sw_conn->sock)
595 596 597 598 599 600
		return;
	/*
	 * Make sure our recv side is stopped.
	 * Older tools called conn stop before ep_disconnect
	 * so IO could still be coming in.
	 */
601
	write_lock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
602
	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
603
	write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
604 605

	iscsi_conn_stop(cls_conn, flag);
606
	iscsi_sw_tcp_release_conn(conn);
607 608
}

609 610 611 612 613
static int iscsi_sw_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
				 char *buf, int *port,
				 int (*getname)(struct socket *,
						struct sockaddr *,
						int *addrlen))
614 615 616 617 618 619
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

620
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
621 622 623 624 625 626 627 628 629 630 631 632
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
H
Harvey Harrison 已提交
633
		sprintf(buf, "%pI4", &sin->sin_addr.s_addr);
634 635 636 637 638 639
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
H
Harvey Harrison 已提交
640
		sprintf(buf, "%pI6", &sin6->sin6_addr);
641 642 643 644 645 646 647 648 649
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

650
static int
651 652 653
iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
		       struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
		       int is_leading)
654
{
655 656
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
	struct iscsi_host *ihost = shost_priv(shost);
657 658
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
659
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
660 661 662
	struct sock *sk;
	struct socket *sock;
	int err;
663

664
	/* lookup for existing socket */
665
	sock = sockfd_lookup((int)transport_eph, &err);
666
	if (!sock) {
667 668
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
669
		return -EEXIST;
670
	}
671 672 673 674 675
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
676 677
	err = iscsi_sw_tcp_get_addr(conn, sock, conn->portal_address,
				    &conn->portal_port, kernel_getpeername);
678 679 680
	if (err)
		goto free_socket;

681 682
	err = iscsi_sw_tcp_get_addr(conn, sock, ihost->local_address,
				    &ihost->local_port, kernel_getsockname);
683 684
	if (err)
		goto free_socket;
685

686 687
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
688
		goto free_socket;
689

690
	/* bind iSCSI connection and socket */
691
	tcp_sw_conn->sock = sock;
692

693 694 695 696 697
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
698

699 700
	iscsi_sw_tcp_conn_set_callbacks(conn);
	tcp_sw_conn->sendpage = tcp_sw_conn->sock->ops->sendpage;
701 702 703
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
704
	iscsi_tcp_hdr_recv_prep(tcp_conn);
705
	return 0;
706 707 708 709

free_socket:
	sockfd_put(sock);
	return err;
710 711
}

712 713 714
static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
				       enum iscsi_param param, char *buf,
				       int buflen)
715
{
716
	struct iscsi_conn *conn = cls_conn->dd_data;
717
	struct iscsi_session *session = conn->session;
718
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
719
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
720
	int value;
721 722 723

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
724
		iscsi_set_param(cls_conn, param, buf, buflen);
725 726
		break;
	case ISCSI_PARAM_DATADGST_EN:
727
		iscsi_set_param(cls_conn, param, buf, buflen);
728 729
		tcp_sw_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage;
730 731
		break;
	case ISCSI_PARAM_MAX_R2T:
732
		sscanf(buf, "%d", &value);
733 734 735
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
736
			break;
737
		iscsi_tcp_r2tpool_free(session);
738
		iscsi_set_param(cls_conn, param, buf, buflen);
739
		if (iscsi_tcp_r2tpool_alloc(session))
740 741 742
			return -ENOMEM;
		break;
	default:
743
		return iscsi_set_param(cls_conn, param, buf, buflen);
744 745 746 747 748
	}

	return 0;
}

749 750
static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
				       enum iscsi_param param, char *buf)
751
{
752
	struct iscsi_conn *conn = cls_conn->dd_data;
753
	int len;
754 755

	switch(param) {
756
	case ISCSI_PARAM_CONN_PORT:
757 758 759
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
760
		break;
761
	case ISCSI_PARAM_CONN_ADDRESS:
762 763 764
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
765 766
		break;
	default:
767
		return iscsi_conn_get_param(cls_conn, param, buf);
768 769 770 771 772
	}

	return len;
}

773
static void
774 775
iscsi_sw_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
			    struct iscsi_stats *stats)
776
{
777
	struct iscsi_conn *conn = cls_conn->dd_data;
778
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
779
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
780 781 782

	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
783
	stats->custom[0].value = tcp_sw_conn->sendpage_failures_cnt;
784
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
785
	stats->custom[1].value = tcp_sw_conn->discontiguous_hdr_cnt;
786 787
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
788 789

	iscsi_tcp_conn_get_stats(cls_conn, stats);
790 791
}

792
static struct iscsi_cls_session *
793
iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
794
			    uint16_t qdepth, uint32_t initial_cmdsn)
795
{
796 797
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
798
	struct Scsi_Host *shost;
799

800 801
	if (ep) {
		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
802 803 804
		return NULL;
	}

805
	shost = iscsi_host_alloc(&iscsi_sw_tcp_sht, 0, 1);
806
	if (!shost)
807
		return NULL;
808
	shost->transportt = iscsi_sw_tcp_scsi_transport;
809
	shost->cmd_per_lun = qdepth;
810 811 812
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
813
	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
814

815
	if (iscsi_host_add(shost, NULL))
816 817
		goto free_host;

818 819 820 821
	cls_session = iscsi_session_setup(&iscsi_sw_tcp_transport, shost,
					  cmds_max,
					  sizeof(struct iscsi_tcp_task) +
					  sizeof(struct iscsi_sw_tcp_hdrbuf),
822
					  initial_cmdsn, 0);
823 824 825
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
826

827
	shost->can_queue = session->scsi_cmds_max;
828
	if (iscsi_tcp_r2tpool_alloc(session))
829
		goto remove_session;
830 831
	return cls_session;

832
remove_session:
833
	iscsi_session_teardown(cls_session);
834
remove_host:
835
	iscsi_host_remove(shost);
836
free_host:
837
	iscsi_host_free(shost);
838 839 840
	return NULL;
}

841
static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
842
{
843 844
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);

845
	iscsi_tcp_r2tpool_free(cls_session->dd_data);
846
	iscsi_session_teardown(cls_session);
847

848 849
	iscsi_host_remove(shost);
	iscsi_host_free(shost);
850 851
}

P
Pete Wyckoff 已提交
852 853 854 855 856 857
static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev)
{
	set_bit(QUEUE_FLAG_BIDI, &sdev->request_queue->queue_flags);
	return 0;
}

858
static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
859
{
860
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
861 862 863 864
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

865
static struct scsi_host_template iscsi_sw_tcp_sht = {
866
	.module			= THIS_MODULE,
867
	.name			= "iSCSI Initiator over TCP/IP",
868 869
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
870
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
871
	.sg_tablesize		= 4096,
872
	.max_sectors		= 0xFFFF,
873 874
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
875
	.eh_device_reset_handler= iscsi_eh_device_reset,
876
	.eh_target_reset_handler= iscsi_eh_target_reset,
877
	.use_clustering         = DISABLE_CLUSTERING,
P
Pete Wyckoff 已提交
878
	.slave_alloc            = iscsi_sw_tcp_slave_alloc,
879
	.slave_configure        = iscsi_sw_tcp_slave_configure,
880
	.target_alloc		= iscsi_target_alloc,
881 882 883 884
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

885
static struct iscsi_transport iscsi_sw_tcp_transport = {
886 887 888 889
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
890 891 892 893 894 895 896 897 898 899 900 901 902
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
903
				  ISCSI_CONN_ADDRESS |
904 905 906
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
907 908
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
909
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
910 911
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
				  ISCSI_LU_RESET_TMO |
912 913
				  ISCSI_PING_TMO | ISCSI_RECV_TMO |
				  ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
914
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
915 916
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
917
	/* session management */
918 919
	.create_session		= iscsi_sw_tcp_session_create,
	.destroy_session	= iscsi_sw_tcp_session_destroy,
920
	/* connection management */
921 922 923 924 925
	.create_conn		= iscsi_sw_tcp_conn_create,
	.bind_conn		= iscsi_sw_tcp_conn_bind,
	.destroy_conn		= iscsi_sw_tcp_conn_destroy,
	.set_param		= iscsi_sw_tcp_conn_set_param,
	.get_conn_param		= iscsi_sw_tcp_conn_get_param,
926
	.get_session_param	= iscsi_session_get_param,
927
	.start_conn		= iscsi_conn_start,
928
	.stop_conn		= iscsi_sw_tcp_conn_stop,
929
	/* iscsi host params */
930
	.get_host_param		= iscsi_host_get_param,
931
	.set_host_param		= iscsi_host_set_param,
932
	/* IO */
933
	.send_pdu		= iscsi_conn_send_pdu,
934
	.get_stats		= iscsi_sw_tcp_conn_get_stats,
935
	/* iscsi task/cmd helpers */
936 937 938
	.init_task		= iscsi_tcp_task_init,
	.xmit_task		= iscsi_tcp_task_xmit,
	.cleanup_task		= iscsi_tcp_cleanup_task,
939
	/* low level pdu helpers */
940 941 942
	.xmit_pdu		= iscsi_sw_tcp_pdu_xmit,
	.init_pdu		= iscsi_sw_tcp_pdu_init,
	.alloc_pdu		= iscsi_sw_tcp_pdu_alloc,
943
	/* recovery */
M
Mike Christie 已提交
944
	.session_recovery_timedout = iscsi_session_recovery_timedout,
945 946
};

947
static int __init iscsi_sw_tcp_init(void)
948 949
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
950 951
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
952 953 954
		return -EINVAL;
	}

955 956 957
	iscsi_sw_tcp_scsi_transport = iscsi_register_transport(
						&iscsi_sw_tcp_transport);
	if (!iscsi_sw_tcp_scsi_transport)
958
		return -ENODEV;
959

960
	return 0;
961 962
}

963
static void __exit iscsi_sw_tcp_exit(void)
964
{
965
	iscsi_unregister_transport(&iscsi_sw_tcp_transport);
966 967
}

968 969
module_init(iscsi_sw_tcp_init);
module_exit(iscsi_sw_tcp_exit);