iscsi_tcp.c 26.8 KB
Newer Older
1 2 3 4 5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6 7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/inet.h>
31
#include <linux/slab.h>
M
Mike Christie 已提交
32
#include <linux/file.h>
33 34 35 36 37 38 39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41 42 43 44 45 46
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

47 48
MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, "
	      "Dmitry Yusupov <dmitry_yus@yahoo.com>, "
49 50 51 52
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");

53 54 55
static struct scsi_transport_template *iscsi_sw_tcp_scsi_transport;
static struct scsi_host_template iscsi_sw_tcp_sht;
static struct iscsi_transport iscsi_sw_tcp_transport;
56

57 58 59
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
static int iscsi_sw_tcp_dbg;
module_param_named(debug_iscsi_tcp, iscsi_sw_tcp_dbg, int,
		   S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(debug_iscsi_tcp, "Turn on debugging for iscsi_tcp module "
		 "Set to 1 to turn on, and zero to turn off. Default is off.");

#define ISCSI_SW_TCP_DBG(_conn, dbg_fmt, arg...)		\
	do {							\
		if (iscsi_sw_tcp_dbg)				\
			iscsi_conn_printk(KERN_INFO, _conn,	\
					     "%s " dbg_fmt,	\
					     __func__, ##arg);	\
	} while (0);


O
Olaf Kirch 已提交
75
/**
76
 * iscsi_sw_tcp_recv - TCP receive in sendfile fashion
77 78 79 80
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
81 82 83
 */
static int iscsi_sw_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
			     unsigned int offset, size_t len)
84 85 86 87 88
{
	struct iscsi_conn *conn = rd_desc->arg.data;
	unsigned int consumed, total_consumed = 0;
	int status;

89
	ISCSI_SW_TCP_DBG(conn, "in %d bytes\n", skb->len - offset);
90 91 92 93 94 95 96 97

	do {
		status = 0;
		consumed = iscsi_tcp_recv_skb(conn, skb, offset, 0, &status);
		offset += consumed;
		total_consumed += consumed;
	} while (consumed != 0 && status != ISCSI_TCP_SKB_DONE);

98 99
	ISCSI_SW_TCP_DBG(conn, "read %d bytes status %d\n",
			 skb->len - offset, status);
100
	return total_consumed;
101 102
}

103 104 105 106 107 108 109 110 111 112
/**
 * iscsi_sw_sk_state_check - check socket state
 * @sk: socket
 *
 * If the socket is in CLOSE or CLOSE_WAIT we should
 * not close the connection if there is still some
 * data pending.
 */
static inline int iscsi_sw_sk_state_check(struct sock *sk)
{
113
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
114

115 116 117 118 119 120
	if ((sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) &&
	    !atomic_read(&sk->sk_rmem_alloc)) {
		ISCSI_SW_TCP_DBG(conn, "TCP_CLOSE|TCP_CLOSE_WAIT\n");
		iscsi_conn_failure(conn, ISCSI_ERR_TCP_CONN_CLOSE);
		return -ECONNRESET;
	}
121 122 123
	return 0;
}

124
static void iscsi_sw_tcp_data_ready(struct sock *sk, int flag)
125 126
{
	struct iscsi_conn *conn = sk->sk_user_data;
O
Olaf Kirch 已提交
127
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
128 129 130 131
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

132
	/*
O
Olaf Kirch 已提交
133
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
134
	 * We set count to 1 because we want the network layer to
O
Olaf Kirch 已提交
135
	 * hand us all the skbs that are available. iscsi_tcp_recv
136 137
	 * handled pdus that cross buffers or pdus that still need data.
	 */
138
	rd_desc.arg.data = conn;
139
	rd_desc.count = 1;
140
	tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv);
141

142
	iscsi_sw_sk_state_check(sk);
143

144
	read_unlock(&sk->sk_callback_lock);
O
Olaf Kirch 已提交
145 146 147

	/* If we had to (atomically) map a highmem page,
	 * unmap it now. */
148
	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
149 150
}

151
static void iscsi_sw_tcp_state_change(struct sock *sk)
152
{
153
	struct iscsi_tcp_conn *tcp_conn;
154
	struct iscsi_sw_tcp_conn *tcp_sw_conn;
155 156 157 158 159 160 161 162 163
	struct iscsi_conn *conn;
	struct iscsi_session *session;
	void (*old_state_change)(struct sock *);

	read_lock(&sk->sk_callback_lock);

	conn = (struct iscsi_conn*)sk->sk_user_data;
	session = conn->session;

164
	iscsi_sw_sk_state_check(sk);
165

166
	tcp_conn = conn->dd_data;
167 168
	tcp_sw_conn = tcp_conn->dd_data;
	old_state_change = tcp_sw_conn->old_state_change;
169 170 171 172 173 174 175 176 177 178

	read_unlock(&sk->sk_callback_lock);

	old_state_change(sk);
}

/**
 * iscsi_write_space - Called when more output buffer space is available
 * @sk: socket space is available for
 **/
179
static void iscsi_sw_tcp_write_space(struct sock *sk)
180 181
{
	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
182
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
183
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
184

185
	tcp_sw_conn->old_write_space(sk);
186
	ISCSI_SW_TCP_DBG(conn, "iscsi_write_space\n");
187
	iscsi_conn_queue_work(conn);
188 189
}

190
static void iscsi_sw_tcp_conn_set_callbacks(struct iscsi_conn *conn)
191
{
192
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
193 194
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct sock *sk = tcp_sw_conn->sock->sk;
195 196 197 198

	/* assign new callbacks */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data = conn;
199 200 201 202 203 204
	tcp_sw_conn->old_data_ready = sk->sk_data_ready;
	tcp_sw_conn->old_state_change = sk->sk_state_change;
	tcp_sw_conn->old_write_space = sk->sk_write_space;
	sk->sk_data_ready = iscsi_sw_tcp_data_ready;
	sk->sk_state_change = iscsi_sw_tcp_state_change;
	sk->sk_write_space = iscsi_sw_tcp_write_space;
205 206 207
	write_unlock_bh(&sk->sk_callback_lock);
}

208 209
static void
iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_sw_tcp_conn *tcp_sw_conn)
210
{
211
	struct sock *sk = tcp_sw_conn->sock->sk;
212 213 214 215

	/* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
	write_lock_bh(&sk->sk_callback_lock);
	sk->sk_user_data    = NULL;
216 217 218
	sk->sk_data_ready   = tcp_sw_conn->old_data_ready;
	sk->sk_state_change = tcp_sw_conn->old_state_change;
	sk->sk_write_space  = tcp_sw_conn->old_write_space;
219 220 221 222 223
	sk->sk_no_check	 = 0;
	write_unlock_bh(&sk->sk_callback_lock);
}

/**
224
 * iscsi_sw_tcp_xmit_segment - transmit segment
225
 * @tcp_conn: the iSCSI TCP connection
226 227 228 229 230 231 232 233 234 235
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
236
static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
237 238
				     struct iscsi_segment *segment)
{
239
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
240 241 242 243
	struct socket *sk = tcp_sw_conn->sock;
	unsigned int copied = 0;
	int r = 0;

244
	while (!iscsi_tcp_segment_done(tcp_conn, segment, 0, r)) {
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_sw_conn->sendpage(sk, sg_page(sg), offset,
						  copy, flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_sw_tcp_xmit - TCP transmit
283
 **/
284
static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
285
{
286
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
287 288
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
289 290
	unsigned int consumed = 0;
	int rc = 0;
291

292
	while (1) {
293
		rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment);
294 295 296 297 298 299 300 301
		/*
		 * We may not have been able to send data because the conn
		 * is getting stopped. libiscsi will know so propogate err
		 * for it to do the right thing.
		 */
		if (rc == -EAGAIN)
			return rc;
		else if (rc < 0) {
302
			rc = ISCSI_ERR_XMIT_FAILED;
303
			goto error;
304
		} else if (rc == 0)
305 306 307 308 309 310 311
			break;

		consumed += rc;

		if (segment->total_copied >= segment->total_size) {
			if (segment->done != NULL) {
				rc = segment->done(tcp_conn, segment);
312
				if (rc != 0)
313 314 315
					goto error;
			}
		}
316 317
	}

318
	ISCSI_SW_TCP_DBG(conn, "xmit %d bytes\n", consumed);
319 320 321 322 323 324 325

	conn->txdata_octets += consumed;
	return consumed;

error:
	/* Transmit error. We could initiate error recovery
	 * here. */
326
	ISCSI_SW_TCP_DBG(conn, "Error sending PDU, errno=%d\n", rc);
327 328
	iscsi_conn_failure(conn, rc);
	return -EIO;
329 330 331
}

/**
332 333
 * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
 */
334
static inline int iscsi_sw_tcp_xmit_qlen(struct iscsi_conn *conn)
335
{
336
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
337 338
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
339

340
	return segment->total_copied - segment->total_size;
341 342
}

343
static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
344
{
345
	struct iscsi_conn *conn = task->conn;
346 347
	int rc;

348 349
	while (iscsi_sw_tcp_xmit_qlen(conn)) {
		rc = iscsi_sw_tcp_xmit(conn);
350
		if (rc == 0)
351
			return -EAGAIN;
352 353
		if (rc < 0)
			return rc;
354
	}
355

356
	return 0;
357 358
}

359 360 361 362
/*
 * This is called when we're done sending the header.
 * Simply copy the data_segment to the send segment, and return.
 */
363 364
static int iscsi_sw_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
				      struct iscsi_segment *segment)
365
{
366 367 368
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;

	tcp_sw_conn->out.segment = tcp_sw_conn->out.data_segment;
369 370 371 372
	ISCSI_SW_TCP_DBG(tcp_conn->iscsi_conn,
			 "Header done. Next segment size %u total_size %u\n",
			 tcp_sw_conn->out.segment.size,
			 tcp_sw_conn->out.segment.total_size);
373 374 375
	return 0;
}

376 377
static void iscsi_sw_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr,
				       size_t hdrlen)
378 379
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
380
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
381

382 383
	ISCSI_SW_TCP_DBG(conn, "%s\n", conn->hdrdgst_en ?
			 "digest enabled" : "digest disabled");
384 385 386

	/* Clear the data segment - needs to be filled in by the
	 * caller using iscsi_tcp_send_data_prep() */
387 388
	memset(&tcp_sw_conn->out.data_segment, 0,
	       sizeof(struct iscsi_segment));
389 390 391

	/* If header digest is enabled, compute the CRC and
	 * place the digest into the same buffer. We make
392
	 * sure that both iscsi_tcp_task and mtask have
393 394 395
	 * sufficient room.
	 */
	if (conn->hdrdgst_en) {
396
		iscsi_tcp_dgst_header(&tcp_sw_conn->tx_hash, hdr, hdrlen,
397 398 399 400 401 402 403
				      hdr + hdrlen);
		hdrlen += ISCSI_DIGEST_SIZE;
	}

	/* Remember header pointer for later, when we need
	 * to decide whether there's a payload to go along
	 * with the header. */
404
	tcp_sw_conn->out.hdr = hdr;
405

406 407
	iscsi_segment_init_linear(&tcp_sw_conn->out.segment, hdr, hdrlen,
				  iscsi_sw_tcp_send_hdr_done, NULL);
408 409 410 411 412 413 414 415
}

/*
 * Prepare the send buffer for the payload data.
 * Padding and checksumming will all be taken care
 * of by the iscsi_segment routines.
 */
static int
416 417 418
iscsi_sw_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
			    unsigned int count, unsigned int offset,
			    unsigned int len)
419 420
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
421
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
422 423 424
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

425 426 427
	ISCSI_SW_TCP_DBG(conn, "offset=%d, datalen=%d %s\n", offset, len,
			 conn->datadgst_en ?
			 "digest enabled" : "digest disabled");
428 429 430

	/* Make sure the datalen matches what the caller
	   said he would send. */
431
	hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
432 433 434
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
435
		tx_hash = &tcp_sw_conn->tx_hash;
436

437 438 439
	return iscsi_segment_seek_sg(&tcp_sw_conn->out.data_segment,
				     sg, count, offset, len,
				     NULL, tx_hash);
440 441 442
}

static void
443
iscsi_sw_tcp_send_linear_data_prep(struct iscsi_conn *conn, void *data,
444 445 446
				   size_t len)
{
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
447
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
448 449 450
	struct hash_desc *tx_hash = NULL;
	unsigned int hdr_spec_len;

451 452
	ISCSI_SW_TCP_DBG(conn, "datalen=%zd %s\n", len, conn->datadgst_en ?
			 "digest enabled" : "digest disabled");
453 454 455

	/* Make sure the datalen matches what the caller
	   said he would send. */
456
	hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
457 458 459
	WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));

	if (conn->datadgst_en)
460
		tx_hash = &tcp_sw_conn->tx_hash;
461

462
	iscsi_segment_init_linear(&tcp_sw_conn->out.data_segment,
463
				data, len, NULL, tx_hash);
464 465
}

466 467
static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task,
				 unsigned int offset, unsigned int count)
468 469 470 471
{
	struct iscsi_conn *conn = task->conn;
	int err = 0;

472
	iscsi_sw_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
473 474 475 476 477

	if (!count)
		return 0;

	if (!task->sc)
478
		iscsi_sw_tcp_send_linear_data_prep(conn, task->data, count);
479 480 481
	else {
		struct scsi_data_buffer *sdb = scsi_out(task->sc);

482 483 484
		err = iscsi_sw_tcp_send_data_prep(conn, sdb->table.sgl,
						  sdb->table.nents, offset,
						  count);
485 486 487
	}

	if (err) {
488
		/* got invalid offset/len */
489 490 491 492 493
		return -EIO;
	}
	return 0;
}

494
static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
495
{
496
	struct iscsi_tcp_task *tcp_task = task->dd_data;
497

498 499
	task->hdr = task->dd_data + sizeof(*tcp_task);
	task->hdr_max = sizeof(struct iscsi_sw_tcp_hdrbuf) - ISCSI_DIGEST_SIZE;
500
	return 0;
501 502
}

503
static struct iscsi_cls_conn *
504 505
iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session,
			 uint32_t conn_idx)
506
{
507 508 509
	struct iscsi_conn *conn;
	struct iscsi_cls_conn *cls_conn;
	struct iscsi_tcp_conn *tcp_conn;
510
	struct iscsi_sw_tcp_conn *tcp_sw_conn;
511

512 513
	cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*tcp_sw_conn),
					conn_idx);
514 515 516
	if (!cls_conn)
		return NULL;
	conn = cls_conn->dd_data;
517
	tcp_conn = conn->dd_data;
518
	tcp_sw_conn = tcp_conn->dd_data;
519

520 521 522 523
	tcp_sw_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						     CRYPTO_ALG_ASYNC);
	tcp_sw_conn->tx_hash.flags = 0;
	if (IS_ERR(tcp_sw_conn->tx_hash.tfm))
524
		goto free_conn;
525

526 527 528 529
	tcp_sw_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
						     CRYPTO_ALG_ASYNC);
	tcp_sw_conn->rx_hash.flags = 0;
	if (IS_ERR(tcp_sw_conn->rx_hash.tfm))
530
		goto free_tx_tfm;
531
	tcp_conn->rx_hash = &tcp_sw_conn->rx_hash;
532

533
	return cls_conn;
534

535
free_tx_tfm:
536
	crypto_free_hash(tcp_sw_conn->tx_hash.tfm);
537
free_conn:
538 539 540 541 542
	iscsi_conn_printk(KERN_ERR, conn,
			  "Could not create connection due to crc32c "
			  "loading error. Make sure the crc32c "
			  "module is built as a module or into the "
			  "kernel\n");
543
	iscsi_tcp_conn_teardown(cls_conn);
544
	return NULL;
545 546
}

547
static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn)
548
{
549
	struct iscsi_session *session = conn->session;
550
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
551 552
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
	struct socket *sock = tcp_sw_conn->sock;
553

554
	if (!sock)
555 556
		return;

557
	sock_hold(sock->sk);
558
	iscsi_sw_tcp_conn_restore_callbacks(tcp_sw_conn);
559
	sock_put(sock->sk);
560

561
	spin_lock_bh(&session->lock);
562
	tcp_sw_conn->sock = NULL;
563 564
	spin_unlock_bh(&session->lock);
	sockfd_put(sock);
565 566
}

567
static void iscsi_sw_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
568
{
569 570
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
571
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
572

573
	iscsi_sw_tcp_release_conn(conn);
574

575 576 577 578
	if (tcp_sw_conn->tx_hash.tfm)
		crypto_free_hash(tcp_sw_conn->tx_hash.tfm);
	if (tcp_sw_conn->rx_hash.tfm)
		crypto_free_hash(tcp_sw_conn->rx_hash.tfm);
579

580
	iscsi_tcp_conn_teardown(cls_conn);
581
}
582

583
static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
584 585
{
	struct iscsi_conn *conn = cls_conn->dd_data;
586
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
587
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
588
	struct socket *sock = tcp_sw_conn->sock;
589 590

	/* userspace may have goofed up and not bound us */
591
	if (!sock)
592 593 594 595 596 597
		return;
	/*
	 * Make sure our recv side is stopped.
	 * Older tools called conn stop before ep_disconnect
	 * so IO could still be coming in.
	 */
598
	write_lock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
599
	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
600
	write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
601

602 603 604 605 606
	if (sock->sk->sk_sleep && waitqueue_active(sock->sk->sk_sleep)) {
		sock->sk->sk_err = EIO;
		wake_up_interruptible(sock->sk->sk_sleep);
	}

607
	iscsi_conn_stop(cls_conn, flag);
608
	iscsi_sw_tcp_release_conn(conn);
609 610
}

611 612 613 614 615
static int iscsi_sw_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
				 char *buf, int *port,
				 int (*getname)(struct socket *,
						struct sockaddr *,
						int *addrlen))
616 617 618 619 620 621
{
	struct sockaddr_storage *addr;
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int rc = 0, len;

622
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
623 624 625 626 627 628 629 630 631 632 633 634
	if (!addr)
		return -ENOMEM;

	if (getname(sock, (struct sockaddr *) addr, &len)) {
		rc = -ENODEV;
		goto free_addr;
	}

	switch (addr->ss_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)addr;
		spin_lock_bh(&conn->session->lock);
H
Harvey Harrison 已提交
635
		sprintf(buf, "%pI4", &sin->sin_addr.s_addr);
636 637 638 639 640 641
		*port = be16_to_cpu(sin->sin_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)addr;
		spin_lock_bh(&conn->session->lock);
H
Harvey Harrison 已提交
642
		sprintf(buf, "%pI6", &sin6->sin6_addr);
643 644 645 646 647 648 649 650 651
		*port = be16_to_cpu(sin6->sin6_port);
		spin_unlock_bh(&conn->session->lock);
		break;
	}
free_addr:
	kfree(addr);
	return rc;
}

652
static int
653 654 655
iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
		       struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
		       int is_leading)
656
{
657 658
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
	struct iscsi_host *ihost = shost_priv(shost);
659 660
	struct iscsi_conn *conn = cls_conn->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
661
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
662 663 664
	struct sock *sk;
	struct socket *sock;
	int err;
665

666
	/* lookup for existing socket */
667
	sock = sockfd_lookup((int)transport_eph, &err);
668
	if (!sock) {
669 670
		iscsi_conn_printk(KERN_ERR, conn,
				  "sockfd_lookup failed %d\n", err);
671
		return -EEXIST;
672
	}
673 674 675 676 677
	/*
	 * copy these values now because if we drop the session
	 * userspace may still want to query the values since we will
	 * be using them for the reconnect
	 */
678 679
	err = iscsi_sw_tcp_get_addr(conn, sock, conn->portal_address,
				    &conn->portal_port, kernel_getpeername);
680 681 682
	if (err)
		goto free_socket;

683 684
	err = iscsi_sw_tcp_get_addr(conn, sock, ihost->local_address,
				    &ihost->local_port, kernel_getsockname);
685 686
	if (err)
		goto free_socket;
687

688 689
	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
	if (err)
690
		goto free_socket;
691

692
	/* bind iSCSI connection and socket */
693
	tcp_sw_conn->sock = sock;
694

695 696 697 698 699
	/* setup Socket parameters */
	sk = sock->sk;
	sk->sk_reuse = 1;
	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
	sk->sk_allocation = GFP_ATOMIC;
700

701 702
	iscsi_sw_tcp_conn_set_callbacks(conn);
	tcp_sw_conn->sendpage = tcp_sw_conn->sock->ops->sendpage;
703 704 705
	/*
	 * set receive state machine into initial state
	 */
O
Olaf Kirch 已提交
706
	iscsi_tcp_hdr_recv_prep(tcp_conn);
707
	return 0;
708 709 710 711

free_socket:
	sockfd_put(sock);
	return err;
712 713
}

714 715 716
static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
				       enum iscsi_param param, char *buf,
				       int buflen)
717
{
718
	struct iscsi_conn *conn = cls_conn->dd_data;
719
	struct iscsi_session *session = conn->session;
720
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
721
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
722
	int value;
723 724 725

	switch(param) {
	case ISCSI_PARAM_HDRDGST_EN:
726
		iscsi_set_param(cls_conn, param, buf, buflen);
727 728
		break;
	case ISCSI_PARAM_DATADGST_EN:
729
		iscsi_set_param(cls_conn, param, buf, buflen);
730 731
		tcp_sw_conn->sendpage = conn->datadgst_en ?
			sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage;
732 733
		break;
	case ISCSI_PARAM_MAX_R2T:
734
		sscanf(buf, "%d", &value);
735 736 737
		if (value <= 0 || !is_power_of_2(value))
			return -EINVAL;
		if (session->max_r2t == value)
738
			break;
739
		iscsi_tcp_r2tpool_free(session);
740
		iscsi_set_param(cls_conn, param, buf, buflen);
741
		if (iscsi_tcp_r2tpool_alloc(session))
742 743 744
			return -ENOMEM;
		break;
	default:
745
		return iscsi_set_param(cls_conn, param, buf, buflen);
746 747 748 749 750
	}

	return 0;
}

751 752
static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
				       enum iscsi_param param, char *buf)
753
{
754
	struct iscsi_conn *conn = cls_conn->dd_data;
755
	int len;
756 757

	switch(param) {
758
	case ISCSI_PARAM_CONN_PORT:
759 760 761
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%hu\n", conn->portal_port);
		spin_unlock_bh(&conn->session->lock);
762
		break;
763
	case ISCSI_PARAM_CONN_ADDRESS:
764 765 766
		spin_lock_bh(&conn->session->lock);
		len = sprintf(buf, "%s\n", conn->portal_address);
		spin_unlock_bh(&conn->session->lock);
767 768
		break;
	default:
769
		return iscsi_conn_get_param(cls_conn, param, buf);
770 771 772 773 774
	}

	return len;
}

775
static void
776 777
iscsi_sw_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
			    struct iscsi_stats *stats)
778
{
779
	struct iscsi_conn *conn = cls_conn->dd_data;
780
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
781
	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
782 783 784

	stats->custom_length = 3;
	strcpy(stats->custom[0].desc, "tx_sendpage_failures");
785
	stats->custom[0].value = tcp_sw_conn->sendpage_failures_cnt;
786
	strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
787
	stats->custom[1].value = tcp_sw_conn->discontiguous_hdr_cnt;
788 789
	strcpy(stats->custom[2].desc, "eh_abort_cnt");
	stats->custom[2].value = conn->eh_abort_cnt;
790 791

	iscsi_tcp_conn_get_stats(cls_conn, stats);
792 793
}

794
static struct iscsi_cls_session *
795
iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
796
			    uint16_t qdepth, uint32_t initial_cmdsn)
797
{
798 799
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
800
	struct Scsi_Host *shost;
801

802 803
	if (ep) {
		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
804 805 806
		return NULL;
	}

807
	shost = iscsi_host_alloc(&iscsi_sw_tcp_sht, 0, 1);
808
	if (!shost)
809
		return NULL;
810
	shost->transportt = iscsi_sw_tcp_scsi_transport;
811
	shost->cmd_per_lun = qdepth;
812 813 814
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
815
	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
816

817
	if (iscsi_host_add(shost, NULL))
818 819
		goto free_host;

820
	cls_session = iscsi_session_setup(&iscsi_sw_tcp_transport, shost,
821
					  cmds_max, 0,
822 823
					  sizeof(struct iscsi_tcp_task) +
					  sizeof(struct iscsi_sw_tcp_hdrbuf),
824
					  initial_cmdsn, 0);
825 826 827
	if (!cls_session)
		goto remove_host;
	session = cls_session->dd_data;
828

829
	shost->can_queue = session->scsi_cmds_max;
830
	if (iscsi_tcp_r2tpool_alloc(session))
831
		goto remove_session;
832 833
	return cls_session;

834
remove_session:
835
	iscsi_session_teardown(cls_session);
836
remove_host:
837
	iscsi_host_remove(shost);
838
free_host:
839
	iscsi_host_free(shost);
840 841 842
	return NULL;
}

843
static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
844
{
845 846
	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);

847
	iscsi_tcp_r2tpool_free(cls_session->dd_data);
848
	iscsi_session_teardown(cls_session);
849

850 851
	iscsi_host_remove(shost);
	iscsi_host_free(shost);
852 853
}

P
Pete Wyckoff 已提交
854 855 856 857 858 859
static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev)
{
	set_bit(QUEUE_FLAG_BIDI, &sdev->request_queue->queue_flags);
	return 0;
}

860
static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
861
{
862
	blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
863 864 865 866
	blk_queue_dma_alignment(sdev->request_queue, 0);
	return 0;
}

867
static struct scsi_host_template iscsi_sw_tcp_sht = {
868
	.module			= THIS_MODULE,
869
	.name			= "iSCSI Initiator over TCP/IP",
870 871
	.queuecommand           = iscsi_queuecommand,
	.change_queue_depth	= iscsi_change_queue_depth,
872
	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
873
	.sg_tablesize		= 4096,
874
	.max_sectors		= 0xFFFF,
875 876
	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
	.eh_abort_handler       = iscsi_eh_abort,
877
	.eh_device_reset_handler= iscsi_eh_device_reset,
878
	.eh_target_reset_handler = iscsi_eh_recover_target,
879
	.use_clustering         = DISABLE_CLUSTERING,
P
Pete Wyckoff 已提交
880
	.slave_alloc            = iscsi_sw_tcp_slave_alloc,
881
	.slave_configure        = iscsi_sw_tcp_slave_configure,
882
	.target_alloc		= iscsi_target_alloc,
883 884 885 886
	.proc_name		= "iscsi_tcp",
	.this_id		= -1,
};

887
static struct iscsi_transport iscsi_sw_tcp_transport = {
888 889 890 891
	.owner			= THIS_MODULE,
	.name			= "tcp",
	.caps			= CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
				  | CAP_DATADGST,
892 893 894 895 896 897 898 899 900 901 902 903 904
	.param_mask		= ISCSI_MAX_RECV_DLENGTH |
				  ISCSI_MAX_XMIT_DLENGTH |
				  ISCSI_HDRDGST_EN |
				  ISCSI_DATADGST_EN |
				  ISCSI_INITIAL_R2T_EN |
				  ISCSI_MAX_R2T |
				  ISCSI_IMM_DATA_EN |
				  ISCSI_FIRST_BURST |
				  ISCSI_MAX_BURST |
				  ISCSI_PDU_INORDER_EN |
				  ISCSI_DATASEQ_INORDER_EN |
				  ISCSI_ERL |
				  ISCSI_CONN_PORT |
905
				  ISCSI_CONN_ADDRESS |
906 907 908
				  ISCSI_EXP_STATSN |
				  ISCSI_PERSISTENT_PORT |
				  ISCSI_PERSISTENT_ADDRESS |
909 910
				  ISCSI_TARGET_NAME | ISCSI_TPGT |
				  ISCSI_USERNAME | ISCSI_PASSWORD |
911
				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
912
				  ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
913
				  ISCSI_LU_RESET_TMO | ISCSI_TGT_RESET_TMO |
914 915
				  ISCSI_PING_TMO | ISCSI_RECV_TMO |
				  ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
916
	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
917 918
				  ISCSI_HOST_INITIATOR_NAME |
				  ISCSI_HOST_NETDEV_NAME,
919
	/* session management */
920 921
	.create_session		= iscsi_sw_tcp_session_create,
	.destroy_session	= iscsi_sw_tcp_session_destroy,
922
	/* connection management */
923 924 925 926 927
	.create_conn		= iscsi_sw_tcp_conn_create,
	.bind_conn		= iscsi_sw_tcp_conn_bind,
	.destroy_conn		= iscsi_sw_tcp_conn_destroy,
	.set_param		= iscsi_sw_tcp_conn_set_param,
	.get_conn_param		= iscsi_sw_tcp_conn_get_param,
928
	.get_session_param	= iscsi_session_get_param,
929
	.start_conn		= iscsi_conn_start,
930
	.stop_conn		= iscsi_sw_tcp_conn_stop,
931
	/* iscsi host params */
932
	.get_host_param		= iscsi_host_get_param,
933
	.set_host_param		= iscsi_host_set_param,
934
	/* IO */
935
	.send_pdu		= iscsi_conn_send_pdu,
936
	.get_stats		= iscsi_sw_tcp_conn_get_stats,
937
	/* iscsi task/cmd helpers */
938 939 940
	.init_task		= iscsi_tcp_task_init,
	.xmit_task		= iscsi_tcp_task_xmit,
	.cleanup_task		= iscsi_tcp_cleanup_task,
941
	/* low level pdu helpers */
942 943 944
	.xmit_pdu		= iscsi_sw_tcp_pdu_xmit,
	.init_pdu		= iscsi_sw_tcp_pdu_init,
	.alloc_pdu		= iscsi_sw_tcp_pdu_alloc,
945
	/* recovery */
M
Mike Christie 已提交
946
	.session_recovery_timedout = iscsi_session_recovery_timedout,
947 948
};

949
static int __init iscsi_sw_tcp_init(void)
950 951
{
	if (iscsi_max_lun < 1) {
O
Or Gerlitz 已提交
952 953
		printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
		       iscsi_max_lun);
954 955 956
		return -EINVAL;
	}

957 958 959
	iscsi_sw_tcp_scsi_transport = iscsi_register_transport(
						&iscsi_sw_tcp_transport);
	if (!iscsi_sw_tcp_scsi_transport)
960
		return -ENODEV;
961

962
	return 0;
963 964
}

965
static void __exit iscsi_sw_tcp_exit(void)
966
{
967
	iscsi_unregister_transport(&iscsi_sw_tcp_transport);
968 969
}

970 971
module_init(iscsi_sw_tcp_init);
module_exit(iscsi_sw_tcp_exit);