socket.c 56.9 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
4
 * Copyright (c) 2001-2007, 2012-2014, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36 37
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "core.h"
38
#include "port.h"
39
#include "name_table.h"
E
Erik Hugne 已提交
40
#include "node.h"
41
#include "link.h"
42 43
#include <linux/export.h>

P
Per Liden 已提交
44 45 46
#define SS_LISTENING	-1	/* socket is listening */
#define SS_READY	-2	/* socket is connectionless */

47
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
48
#define TIPC_FWD_MSG	        1
P
Per Liden 已提交
49

50
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
51
static void tipc_data_ready(struct sock *sk);
52
static void tipc_write_space(struct sock *sk);
53 54
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
55
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
56
static void tipc_sk_timeout(unsigned long ref);
P
Per Liden 已提交
57

58 59 60
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
61 62

static struct proto tipc_proto;
63
static struct proto tipc_proto_kern;
P
Per Liden 已提交
64

65
/*
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
 * Revised TIPC socket locking policy:
 *
 * Most socket operations take the standard socket lock when they start
 * and hold it until they finish (or until they need to sleep).  Acquiring
 * this lock grants the owner exclusive access to the fields of the socket
 * data structures, with the exception of the backlog queue.  A few socket
 * operations can be done without taking the socket lock because they only
 * read socket information that never changes during the life of the socket.
 *
 * Socket operations may acquire the lock for the associated TIPC port if they
 * need to perform an operation on the port.  If any routine needs to acquire
 * both the socket lock and the port lock it must take the socket lock first
 * to avoid the risk of deadlock.
 *
 * The dispatcher handling incoming messages cannot grab the socket lock in
 * the standard fashion, since invoked it runs at the BH level and cannot block.
 * Instead, it checks to see if the socket lock is currently owned by someone,
 * and either handles the message itself or adds it to the socket's backlog
 * queue; in the latter case the queued message is processed once the process
 * owning the socket lock releases it.
 *
 * NOTE: Releasing the socket lock while an operation is sleeping overcomes
 * the problem of a blocked socket operation preventing any other operations
 * from occurring.  However, applications must be careful if they have
 * multiple threads trying to send (or receive) on the same socket, as these
 * operations might interfere with each other.  For example, doing a connect
 * and a receive at the same time might allow the receive to consume the
 * ACK message meant for the connect.  While additional work could be done
 * to try and overcome this, it doesn't seem to be worthwhile at the present.
 *
 * NOTE: Releasing the socket lock while an operation is sleeping also ensures
 * that another operation that must be performed in a non-blocking manner is
 * not delayed for very long because the lock has already been taken.
 *
 * NOTE: This code assumes that certain fields of a port/socket pair are
 * constant over its lifetime; such fields can be examined without taking
 * the socket lock and/or port lock, and do not need to be re-read even
 * after resuming processing after waiting.  These fields include:
 *   - socket type
 *   - pointer to socket sk structure (aka tipc_sock structure)
 *   - pointer to port structure
 *   - port reference
 */

110 111
#include "socket.h"

112 113 114 115
/**
 * advance_rx_queue - discard first buffer in socket receive queue
 *
 * Caller must hold socket lock
P
Per Liden 已提交
116
 */
117
static void advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
118
{
119
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
120 121 122
}

/**
123 124 125
 * reject_rx_queue - reject all buffers in socket receive queue
 *
 * Caller must hold socket lock
P
Per Liden 已提交
126
 */
127
static void reject_rx_queue(struct sock *sk)
P
Per Liden 已提交
128
{
129
	struct sk_buff *buf;
130
	u32 dnode;
131

132 133
	while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
		if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT))
134
			tipc_link_xmit(buf, dnode, 0);
135
	}
P
Per Liden 已提交
136 137 138
}

/**
139
 * tipc_sk_create - create a TIPC socket
140
 * @net: network namespace (must be default network)
P
Per Liden 已提交
141 142
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
143
 * @kern: caused by kernel or by userspace?
144
 *
145 146
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
147 148 149
 *
 * Returns 0 on success, errno otherwise
 */
150 151
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
152
{
153 154
	const struct proto_ops *ops;
	socket_state state;
P
Per Liden 已提交
155
	struct sock *sk;
156 157 158
	struct tipc_sock *tsk;
	struct tipc_port *port;
	u32 ref;
159 160

	/* Validate arguments */
P
Per Liden 已提交
161 162 163 164 165
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
166 167
		ops = &stream_ops;
		state = SS_UNCONNECTED;
P
Per Liden 已提交
168 169
		break;
	case SOCK_SEQPACKET:
170 171
		ops = &packet_ops;
		state = SS_UNCONNECTED;
P
Per Liden 已提交
172 173 174
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
175 176
		ops = &msg_ops;
		state = SS_READY;
P
Per Liden 已提交
177
		break;
178 179
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
180 181
	}

182
	/* Allocate socket's protocol area */
183 184 185 186 187
	if (!kern)
		sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
	else
		sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern);

188
	if (sk == NULL)
P
Per Liden 已提交
189 190
		return -ENOMEM;

191 192 193 194 195 196
	tsk = tipc_sk(sk);
	port = &tsk->port;

	ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE);
	if (!ref) {
		pr_warn("Socket registration failed, ref. table exhausted\n");
197 198 199
		sk_free(sk);
		return -ENOMEM;
	}
P
Per Liden 已提交
200

201 202 203
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock->state = state;
P
Per Liden 已提交
204

205
	sock_init_data(sock, sk);
206
	k_init_timer(&port->timer, (Handler)tipc_sk_timeout, ref);
207
	sk->sk_backlog_rcv = tipc_backlog_rcv;
208
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
209 210
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
211
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
212
	tsk->sent_unacked = 0;
213
	atomic_set(&tsk->dupl_rcvcnt, 0);
214
	tipc_port_unlock(port);
215

216
	if (sock->state == SS_READY) {
217
		tipc_port_set_unreturnable(port, true);
218
		if (sock->type == SOCK_DGRAM)
219
			tipc_port_set_unreliable(port, true);
220
	}
P
Per Liden 已提交
221 222 223
	return 0;
}

224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
/**
 * tipc_sock_create_local - create TIPC socket from inside TIPC module
 * @type: socket type - SOCK_RDM or SOCK_SEQPACKET
 *
 * We cannot use sock_creat_kern here because it bumps module user count.
 * Since socket owner and creator is the same module we must make sure
 * that module count remains zero for module local sockets, otherwise
 * we cannot do rmmod.
 *
 * Returns 0 on success, errno otherwise
 */
int tipc_sock_create_local(int type, struct socket **res)
{
	int rc;

	rc = sock_create_lite(AF_TIPC, type, 0, res);
	if (rc < 0) {
		pr_err("Failed to create kernel socket\n");
		return rc;
	}
	tipc_sk_create(&init_net, *res, 0, 1);

	return 0;
}

/**
 * tipc_sock_release_local - release socket created by tipc_sock_create_local
 * @sock: the socket to be released.
 *
 * Module reference count is not incremented when such sockets are created,
 * so we must keep it from being decremented when they are released.
 */
void tipc_sock_release_local(struct socket *sock)
{
258
	tipc_release(sock);
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
	sock->ops = NULL;
	sock_release(sock);
}

/**
 * tipc_sock_accept_local - accept a connection on a socket created
 * with tipc_sock_create_local. Use this function to avoid that
 * module reference count is inadvertently incremented.
 *
 * @sock:    the accepting socket
 * @newsock: reference to the new socket to be created
 * @flags:   socket flags
 */

int tipc_sock_accept_local(struct socket *sock, struct socket **newsock,
274
			   int flags)
275 276 277 278 279 280 281 282 283
{
	struct sock *sk = sock->sk;
	int ret;

	ret = sock_create_lite(sk->sk_family, sk->sk_type,
			       sk->sk_protocol, newsock);
	if (ret < 0)
		return ret;

284
	ret = tipc_accept(sock, *newsock, flags);
285 286 287 288 289 290 291 292
	if (ret < 0) {
		sock_release(*newsock);
		return ret;
	}
	(*newsock)->ops = sock->ops;
	return ret;
}

P
Per Liden 已提交
293
/**
294
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
295 296 297 298 299 300 301
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
302
 *
P
Per Liden 已提交
303 304 305 306 307 308
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
309
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
310 311
{
	struct sock *sk = sock->sk;
312 313
	struct tipc_sock *tsk;
	struct tipc_port *port;
P
Per Liden 已提交
314
	struct sk_buff *buf;
315
	u32 dnode;
P
Per Liden 已提交
316

317 318 319 320 321
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
322
		return 0;
323

324 325
	tsk = tipc_sk(sk);
	port = &tsk->port;
326 327 328 329 330 331
	lock_sock(sk);

	/*
	 * Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer)
	 */
P
Per Liden 已提交
332
	while (sock->state != SS_DISCONNECTING) {
333 334
		buf = __skb_dequeue(&sk->sk_receive_queue);
		if (buf == NULL)
P
Per Liden 已提交
335
			break;
Y
Ying Xue 已提交
336
		if (TIPC_SKB_CB(buf)->handle != NULL)
337
			kfree_skb(buf);
338 339 340 341
		else {
			if ((sock->state == SS_CONNECTING) ||
			    (sock->state == SS_CONNECTED)) {
				sock->state = SS_DISCONNECTING;
342
				tipc_port_disconnect(port->ref);
343
			}
344
			if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT))
345
				tipc_link_xmit(buf, dnode, 0);
346
		}
P
Per Liden 已提交
347 348
	}

349 350
	/* Destroy TIPC port; also disconnects an active connection and
	 * sends a 'FIN-' to peer.
351
	 */
352
	tipc_port_destroy(port);
P
Per Liden 已提交
353

354
	/* Discard any remaining (connection-based) messages in receive queue */
355
	__skb_queue_purge(&sk->sk_receive_queue);
P
Per Liden 已提交
356

357 358 359
	/* Reject any messages that accumulated in backlog queue */
	sock->state = SS_DISCONNECTING;
	release_sock(sk);
P
Per Liden 已提交
360 361

	sock_put(sk);
362
	sock->sk = NULL;
P
Per Liden 已提交
363

364
	return 0;
P
Per Liden 已提交
365 366 367
}

/**
368
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
369 370 371
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
372
 *
P
Per Liden 已提交
373 374 375
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
376
 *
P
Per Liden 已提交
377
 * Returns 0 on success, errno otherwise
378 379 380
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
381
 */
382 383
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
384
{
385
	struct sock *sk = sock->sk;
P
Per Liden 已提交
386
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
387
	struct tipc_sock *tsk = tipc_sk(sk);
388
	int res = -EINVAL;
P
Per Liden 已提交
389

390 391
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
392
		res = tipc_withdraw(&tsk->port, 0, NULL);
393 394
		goto exit;
	}
395

396 397 398 399 400 401 402 403
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
404 405 406

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
407 408 409 410
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
411

412
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
413
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
414 415 416 417
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
418

419
	res = (addr->scope > 0) ?
420 421
		tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) :
		tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq);
422 423 424
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
425 426
}

427
/**
428
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
429 430 431
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
432
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
433
 *
P
Per Liden 已提交
434
 * Returns 0 on success, errno otherwise
435
 *
436 437
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
438
 *       a completely predictable manner).
P
Per Liden 已提交
439
 */
440 441
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
			int *uaddr_len, int peer)
P
Per Liden 已提交
442 443
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
444
	struct tipc_sock *tsk = tipc_sk(sock->sk);
P
Per Liden 已提交
445

446
	memset(addr, 0, sizeof(*addr));
447
	if (peer) {
448 449 450
		if ((sock->state != SS_CONNECTED) &&
			((peer != 2) || (sock->state != SS_DISCONNECTING)))
			return -ENOTCONN;
451 452
		addr->addr.id.ref = tipc_port_peerport(&tsk->port);
		addr->addr.id.node = tipc_port_peernode(&tsk->port);
453
	} else {
454
		addr->addr.id.ref = tsk->port.ref;
A
Allan Stephens 已提交
455
		addr->addr.id.node = tipc_own_addr;
456
	}
P
Per Liden 已提交
457 458 459 460 461 462 463

	*uaddr_len = sizeof(*addr);
	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

464
	return 0;
P
Per Liden 已提交
465 466 467
}

/**
468
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
469 470 471 472
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
 * @wait: ???
 *
473 474 475 476 477 478 479 480 481
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
 * TIPC sets the returned events as follows:
482 483 484 485
 *
 * socket state		flags set
 * ------------		---------
 * unconnected		no read flags
486
 *			POLLOUT if port is not congested
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
 *
 * connecting		POLLIN/POLLRDNORM if ACK/NACK in rx queue
 *			no write flags
 *
 * connected		POLLIN/POLLRDNORM if data in rx queue
 *			POLLOUT if port is not congested
 *
 * disconnecting	POLLIN/POLLRDNORM/POLLHUP
 *			no write flags
 *
 * listening		POLLIN if SYN in rx queue
 *			no write flags
 *
 * ready		POLLIN/POLLRDNORM if data in rx queue
 * [connectionless]	POLLOUT (since port cannot be congested)
 *
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
506
 */
507 508
static unsigned int tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
509
{
510
	struct sock *sk = sock->sk;
511
	struct tipc_sock *tsk = tipc_sk(sk);
512
	u32 mask = 0;
513

514
	sock_poll_wait(file, sk_sleep(sk), wait);
515

516
	switch ((int)sock->state) {
517
	case SS_UNCONNECTED:
518
		if (!tsk->link_cong)
519 520
			mask |= POLLOUT;
		break;
521 522
	case SS_READY:
	case SS_CONNECTED:
523
		if (!tsk->link_cong && !tipc_sk_conn_cong(tsk))
524 525 526 527 528 529 530 531 532 533 534
			mask |= POLLOUT;
		/* fall thru' */
	case SS_CONNECTING:
	case SS_LISTENING:
		if (!skb_queue_empty(&sk->sk_receive_queue))
			mask |= (POLLIN | POLLRDNORM);
		break;
	case SS_DISCONNECTING:
		mask = (POLLIN | POLLRDNORM | POLLHUP);
		break;
	}
535 536

	return mask;
P
Per Liden 已提交
537 538
}

539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
 * @iov: message data to send
 * @dsz: total length of message data
 * @timeo: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
			  struct iovec *iov, size_t dsz, long timeo)
{
	struct sock *sk = sock->sk;
	struct tipc_msg *mhdr = &tipc_sk(sk)->port.phdr;
	struct sk_buff *buf;
	uint mtu;
	int rc;

	msg_set_type(mhdr, TIPC_MCAST_MSG);
	msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(mhdr, 0);
	msg_set_destnode(mhdr, 0);
	msg_set_nametype(mhdr, seq->type);
	msg_set_namelower(mhdr, seq->lower);
	msg_set_nameupper(mhdr, seq->upper);
	msg_set_hdr_sz(mhdr, MCAST_H_SIZE);

new_mtu:
	mtu = tipc_bclink_get_mtu();
570
	rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf);
571 572 573 574 575 576 577 578 579 580 581 582 583
	if (unlikely(rc < 0))
		return rc;

	do {
		rc = tipc_bclink_xmit(buf);
		if (likely(rc >= 0)) {
			rc = dsz;
			break;
		}
		if (rc == -EMSGSIZE)
			goto new_mtu;
		if (rc != -ELINKCONG)
			break;
584
		tipc_sk(sk)->link_cong = 1;
585 586 587 588 589 590 591
		rc = tipc_wait_for_sndmsg(sock, &timeo);
		if (rc)
			kfree_skb_list(buf);
	} while (!rc);
	return rc;
}

592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
/* tipc_sk_mcast_rcv - Deliver multicast message to all destination sockets
 */
void tipc_sk_mcast_rcv(struct sk_buff *buf)
{
	struct tipc_msg *msg = buf_msg(buf);
	struct tipc_port_list dports = {0, NULL, };
	struct tipc_port_list *item;
	struct sk_buff *b;
	uint i, last, dst = 0;
	u32 scope = TIPC_CLUSTER_SCOPE;

	if (in_own_node(msg_orignode(msg)))
		scope = TIPC_NODE_SCOPE;

	/* Create destination port list: */
	tipc_nametbl_mc_translate(msg_nametype(msg),
				  msg_namelower(msg),
				  msg_nameupper(msg),
				  scope,
				  &dports);
	last = dports.count;
	if (!last) {
		kfree_skb(buf);
		return;
	}

	for (item = &dports; item; item = item->next) {
		for (i = 0; i < PLSIZE && ++dst <= last; i++) {
			b = (dst != last) ? skb_clone(buf, GFP_ATOMIC) : buf;
			if (!b) {
				pr_warn("Failed do clone mcast rcv buffer\n");
				continue;
			}
			msg_set_destport(msg, item->ports[i]);
			tipc_sk_rcv(b);
		}
	}
	tipc_port_list_free(&dports);
}

632 633 634 635 636 637 638 639
/**
 * tipc_sk_proto_rcv - receive a connection mng protocol message
 * @tsk: receiving socket
 * @dnode: node to send response message to, if any
 * @buf: buffer containing protocol message
 * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if
 * (CONN_PROBE_REPLY) message should be forwarded.
 */
640 641
static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode,
			     struct sk_buff *buf)
642 643 644
{
	struct tipc_msg *msg = buf_msg(buf);
	struct tipc_port *port = &tsk->port;
645
	int conn_cong;
646 647 648 649 650 651 652 653

	/* Ignore if connection cannot be validated: */
	if (!port->connected || !tipc_port_peer_msg(port, msg))
		goto exit;

	port->probing_state = TIPC_CONN_OK;

	if (msg_type(msg) == CONN_ACK) {
654 655 656
		conn_cong = tipc_sk_conn_cong(tsk);
		tsk->sent_unacked -= msg_msgcnt(msg);
		if (conn_cong)
657
			tsk->sk.sk_write_space(&tsk->sk);
658 659 660 661 662 663 664 665 666 667 668 669
	} else if (msg_type(msg) == CONN_PROBE) {
		if (!tipc_msg_reverse(buf, dnode, TIPC_OK))
			return TIPC_OK;
		msg_set_type(msg, CONN_PROBE_REPLY);
		return TIPC_FWD_MSG;
	}
	/* Do nothing if msg_type() == CONN_PROBE_REPLY */
exit:
	kfree_skb(buf);
	return TIPC_OK;
}

670
/**
P
Per Liden 已提交
671 672 673
 * dest_name_check - verify user is permitted to send to specified port name
 * @dest: destination address
 * @m: descriptor for message to be sent
674
 *
P
Per Liden 已提交
675 676
 * Prevents restricted configuration commands from being issued by
 * unauthorized users.
677
 *
P
Per Liden 已提交
678 679
 * Returns 0 if permission is granted, otherwise errno
 */
S
Sam Ravnborg 已提交
680
static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
P
Per Liden 已提交
681 682 683
{
	struct tipc_cfg_msg_hdr hdr;

684 685
	if (unlikely(dest->addrtype == TIPC_ADDR_ID))
		return 0;
686 687 688 689 690 691
	if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
		return 0;
	if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
		return 0;
	if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
		return -EACCES;
P
Per Liden 已提交
692

693 694
	if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr)))
		return -EMSGSIZE;
695
	if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
P
Per Liden 已提交
696
		return -EFAULT;
697
	if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
P
Per Liden 已提交
698
		return -EACCES;
699

P
Per Liden 已提交
700 701 702
	return 0;
}

703 704 705
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
{
	struct sock *sk = sock->sk;
706
	struct tipc_sock *tsk = tipc_sk(sk);
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721
	DEFINE_WAIT(wait);
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (sock->state == SS_DISCONNECTING)
			return -EPIPE;
		if (!*timeo_p)
			return -EAGAIN;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
722
		done = sk_wait_event(sk, timeo_p, !tsk->link_cong);
723 724 725 726 727
		finish_wait(sk_sleep(sk), &wait);
	} while (!done);
	return 0;
}

P
Per Liden 已提交
728
/**
729
 * tipc_sendmsg - send message in connectionless manner
730
 * @iocb: if NULL, indicates that socket lock is already held
P
Per Liden 已提交
731 732
 * @sock: socket structure
 * @m: message to send
733
 * @dsz: amount of user data to be sent
734
 *
P
Per Liden 已提交
735
 * Message must have an destination specified explicitly.
736
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
737 738
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
739
 *
P
Per Liden 已提交
740 741
 * Returns the number of bytes sent on success, or errno otherwise
 */
742
static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
743
			struct msghdr *m, size_t dsz)
P
Per Liden 已提交
744
{
745
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
746
	struct sock *sk = sock->sk;
747
	struct tipc_sock *tsk = tipc_sk(sk);
748
	struct tipc_port *port = &tsk->port;
749 750 751 752 753 754
	struct tipc_msg *mhdr = &port->phdr;
	struct iovec *iov = m->msg_iov;
	u32 dnode, dport;
	struct sk_buff *buf;
	struct tipc_name_seq *seq = &dest->addr.nameseq;
	u32 mtu;
755
	long timeo;
756
	int rc = -EINVAL;
P
Per Liden 已提交
757 758 759

	if (unlikely(!dest))
		return -EDESTADDRREQ;
760

761 762
	if (unlikely((m->msg_namelen < sizeof(*dest)) ||
		     (dest->family != AF_TIPC)))
P
Per Liden 已提交
763
		return -EINVAL;
764 765

	if (dsz > TIPC_MAX_USER_MSG_SIZE)
766
		return -EMSGSIZE;
P
Per Liden 已提交
767

768 769 770
	if (iocb)
		lock_sock(sk);

771
	if (unlikely(sock->state != SS_READY)) {
772
		if (sock->state == SS_LISTENING) {
773
			rc = -EPIPE;
774 775 776
			goto exit;
		}
		if (sock->state != SS_UNCONNECTED) {
777
			rc = -EISCONN;
778 779
			goto exit;
		}
780
		if (tsk->port.published) {
781
			rc = -EOPNOTSUPP;
782 783
			goto exit;
		}
784
		if (dest->addrtype == TIPC_ADDR_NAME) {
785 786
			tsk->port.conn_type = dest->addr.name.name.type;
			tsk->port.conn_instance = dest->addr.name.name.instance;
787
		}
P
Per Liden 已提交
788
	}
789 790 791
	rc = dest_name_check(dest, m);
	if (rc)
		goto exit;
P
Per Liden 已提交
792

793
	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814

	if (dest->addrtype == TIPC_ADDR_MCAST) {
		rc = tipc_sendmcast(sock, seq, iov, dsz, timeo);
		goto exit;
	} else if (dest->addrtype == TIPC_ADDR_NAME) {
		u32 type = dest->addr.name.name.type;
		u32 inst = dest->addr.name.name.instance;
		u32 domain = dest->addr.name.domain;

		dnode = domain;
		msg_set_type(mhdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(mhdr, NAMED_H_SIZE);
		msg_set_nametype(mhdr, type);
		msg_set_nameinst(mhdr, inst);
		msg_set_lookup_scope(mhdr, tipc_addr_scope(domain));
		dport = tipc_nametbl_translate(type, inst, &dnode);
		msg_set_destnode(mhdr, dnode);
		msg_set_destport(mhdr, dport);
		if (unlikely(!dport && !dnode)) {
			rc = -EHOSTUNREACH;
			goto exit;
815
		}
816 817 818 819 820 821 822 823 824 825 826
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
		msg_set_type(mhdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(mhdr, 0);
		msg_set_destnode(mhdr, dnode);
		msg_set_destport(mhdr, dest->addr.id.ref);
		msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
	}

new_mtu:
	mtu = tipc_node_get_mtu(dnode, tsk->port.ref);
827
	rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf);
828 829 830 831
	if (rc < 0)
		goto exit;

	do {
832
		TIPC_SKB_CB(buf)->wakeup_pending = tsk->link_cong;
833
		rc = tipc_link_xmit(buf, dnode, tsk->port.ref);
834 835
		if (likely(rc >= 0)) {
			if (sock->state != SS_READY)
836
				sock->state = SS_CONNECTING;
837
			rc = dsz;
838
			break;
839
		}
840 841 842
		if (rc == -EMSGSIZE)
			goto new_mtu;
		if (rc != -ELINKCONG)
843
			break;
844
		tsk->link_cong = 1;
845
		rc = tipc_wait_for_sndmsg(sock, &timeo);
846 847
		if (rc)
			kfree_skb_list(buf);
848
	} while (!rc);
849 850 851
exit:
	if (iocb)
		release_sock(sk);
852 853

	return rc;
P
Per Liden 已提交
854 855
}

856 857 858
static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
{
	struct sock *sk = sock->sk;
859
	struct tipc_sock *tsk = tipc_sk(sk);
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877
	DEFINE_WAIT(wait);
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (sock->state == SS_DISCONNECTING)
			return -EPIPE;
		else if (sock->state != SS_CONNECTED)
			return -ENOTCONN;
		if (!*timeo_p)
			return -EAGAIN;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
		done = sk_wait_event(sk, timeo_p,
878 879 880
				     (!tsk->link_cong &&
				      !tipc_sk_conn_cong(tsk)) ||
				     !tsk->port.connected);
881 882 883 884 885
		finish_wait(sk_sleep(sk), &wait);
	} while (!done);
	return 0;
}

886
/**
887 888
 * tipc_send_stream - send stream-oriented data
 * @iocb: (unused)
P
Per Liden 已提交
889
 * @sock: socket structure
890 891
 * @m: data to send
 * @dsz: total length of data to be transmitted
892
 *
893
 * Used for SOCK_STREAM data.
894
 *
895 896
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
897
 */
898 899
static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
			    struct msghdr *m, size_t dsz)
P
Per Liden 已提交
900
{
901
	struct sock *sk = sock->sk;
902
	struct tipc_sock *tsk = tipc_sk(sk);
903 904 905
	struct tipc_port *port = &tsk->port;
	struct tipc_msg *mhdr = &port->phdr;
	struct sk_buff *buf;
906
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
907 908
	u32 ref = port->ref;
	int rc = -EINVAL;
909
	long timeo;
910 911
	u32 dnode;
	uint mtu, send, sent = 0;
P
Per Liden 已提交
912 913

	/* Handle implied connection establishment */
914 915 916
	if (unlikely(dest)) {
		rc = tipc_sendmsg(iocb, sock, m, dsz);
		if (dsz && (dsz == rc))
917
			tsk->sent_unacked = 1;
918 919 920
		return rc;
	}
	if (dsz > (uint)INT_MAX)
921 922
		return -EMSGSIZE;

923 924
	if (iocb)
		lock_sock(sk);
P
Per Liden 已提交
925

926 927
	if (unlikely(sock->state != SS_CONNECTED)) {
		if (sock->state == SS_DISCONNECTING)
928
			rc = -EPIPE;
929
		else
930
			rc = -ENOTCONN;
931 932
		goto exit;
	}
933

934
	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
935 936 937 938 939
	dnode = tipc_port_peernode(port);

next:
	mtu = port->max_pkt;
	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
940
	rc = tipc_msg_build(mhdr, m->msg_iov, sent, send, mtu, &buf);
941 942
	if (unlikely(rc < 0))
		goto exit;
943
	do {
944
		if (likely(!tipc_sk_conn_cong(tsk))) {
945
			rc = tipc_link_xmit(buf, dnode, ref);
946
			if (likely(!rc)) {
947
				tsk->sent_unacked++;
948 949 950 951 952 953 954 955 956 957 958
				sent += send;
				if (sent == dsz)
					break;
				goto next;
			}
			if (rc == -EMSGSIZE) {
				port->max_pkt = tipc_node_get_mtu(dnode, ref);
				goto next;
			}
			if (rc != -ELINKCONG)
				break;
959
			tsk->link_cong = 1;
960 961
		}
		rc = tipc_wait_for_sndpkt(sock, &timeo);
962 963
		if (rc)
			kfree_skb_list(buf);
964
	} while (!rc);
965
exit:
966 967
	if (iocb)
		release_sock(sk);
968
	return sent ? sent : rc;
P
Per Liden 已提交
969 970
}

971
/**
972 973
 * tipc_send_packet - send a connection-oriented message
 * @iocb: if NULL, indicates that socket lock is already held
P
Per Liden 已提交
974
 * @sock: socket structure
975 976
 * @m: message to send
 * @dsz: length of data to be transmitted
977
 *
978
 * Used for SOCK_SEQPACKET messages.
979
 *
980
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
981
 */
982 983
static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
			    struct msghdr *m, size_t dsz)
P
Per Liden 已提交
984
{
985 986
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
987

988
	return tipc_send_stream(iocb, sock, m, dsz);
P
Per Liden 已提交
989 990 991 992
}

/**
 * auto_connect - complete connection setup to a remote port
993
 * @tsk: tipc socket structure
P
Per Liden 已提交
994
 * @msg: peer's response message
995
 *
P
Per Liden 已提交
996 997
 * Returns 0 on success, errno otherwise
 */
998
static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg)
P
Per Liden 已提交
999
{
1000 1001
	struct tipc_port *port = &tsk->port;
	struct socket *sock = tsk->sk.sk_socket;
1002 1003 1004 1005
	struct tipc_portid peer;

	peer.ref = msg_origport(msg);
	peer.node = msg_orignode(msg);
P
Per Liden 已提交
1006

1007
	__tipc_port_connect(port->ref, port, &peer);
1008 1009 1010

	if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
1011
	msg_set_importance(&port->phdr, (u32)msg_importance(msg));
P
Per Liden 已提交
1012 1013 1014 1015 1016 1017 1018 1019
	sock->state = SS_CONNECTED;
	return 0;
}

/**
 * set_orig_addr - capture sender's address for received message
 * @m: descriptor for message info
 * @msg: received message header
1020
 *
P
Per Liden 已提交
1021 1022
 * Note: Address is not captured if not requested by receiver.
 */
S
Sam Ravnborg 已提交
1023
static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
P
Per Liden 已提交
1024
{
1025
	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
P
Per Liden 已提交
1026

1027
	if (addr) {
P
Per Liden 已提交
1028 1029
		addr->family = AF_TIPC;
		addr->addrtype = TIPC_ADDR_ID;
1030
		memset(&addr->addr, 0, sizeof(addr->addr));
P
Per Liden 已提交
1031 1032
		addr->addr.id.ref = msg_origport(msg);
		addr->addr.id.node = msg_orignode(msg);
1033 1034
		addr->addr.name.domain = 0;	/* could leave uninitialized */
		addr->scope = 0;		/* could leave uninitialized */
P
Per Liden 已提交
1035 1036 1037 1038 1039
		m->msg_namelen = sizeof(struct sockaddr_tipc);
	}
}

/**
1040
 * anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1041 1042 1043
 * @m: descriptor for message info
 * @msg: received message header
 * @tport: TIPC port associated with message
1044
 *
P
Per Liden 已提交
1045
 * Note: Ancillary data is not captured if not requested by receiver.
1046
 *
P
Per Liden 已提交
1047 1048
 * Returns 0 if successful, otherwise errno
 */
S
Sam Ravnborg 已提交
1049
static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
1050
			 struct tipc_port *tport)
P
Per Liden 已提交
1051 1052 1053 1054
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1055
	int has_name;
P
Per Liden 已提交
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1066 1067
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1068
			return res;
1069 1070 1071 1072 1073 1074
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1075 1076 1077 1078 1079 1080
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1081
		has_name = 1;
P
Per Liden 已提交
1082 1083 1084 1085 1086
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1087
		has_name = 1;
P
Per Liden 已提交
1088 1089 1090 1091 1092
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1093
		has_name = (tport->conn_type != 0);
P
Per Liden 已提交
1094 1095 1096 1097 1098
		anc_data[0] = tport->conn_type;
		anc_data[1] = tport->conn_instance;
		anc_data[2] = tport->conn_instance;
		break;
	default:
1099
		has_name = 0;
P
Per Liden 已提交
1100
	}
1101 1102 1103 1104 1105
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1106 1107 1108 1109

	return 0;
}

1110
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1111 1112 1113
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1114
	long timeo = *timeop;
Y
Ying Xue 已提交
1115 1116 1117 1118
	int err;

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1119
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
			if (sock->state == SS_DISCONNECTING) {
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
	}
	finish_wait(sk_sleep(sk), &wait);
1139
	*timeop = timeo;
Y
Ying Xue 已提交
1140 1141 1142
	return err;
}

1143
/**
1144
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1145 1146 1147 1148
 * @iocb: (unused)
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1149
 *
P
Per Liden 已提交
1150 1151 1152 1153 1154
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1155 1156
static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
			struct msghdr *m, size_t buf_len, int flags)
P
Per Liden 已提交
1157
{
1158
	struct sock *sk = sock->sk;
1159 1160
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_port *port = &tsk->port;
P
Per Liden 已提交
1161 1162
	struct sk_buff *buf;
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1163
	long timeo;
P
Per Liden 已提交
1164 1165 1166 1167
	unsigned int sz;
	u32 err;
	int res;

1168
	/* Catch invalid receive requests */
P
Per Liden 已提交
1169 1170 1171
	if (unlikely(!buf_len))
		return -EINVAL;

1172
	lock_sock(sk);
P
Per Liden 已提交
1173

1174 1175
	if (unlikely(sock->state == SS_UNCONNECTED)) {
		res = -ENOTCONN;
P
Per Liden 已提交
1176 1177 1178
		goto exit;
	}

Y
Ying Xue 已提交
1179
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1180
restart:
P
Per Liden 已提交
1181

1182
	/* Look for a message in receive queue; wait if necessary */
1183
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1184 1185
	if (res)
		goto exit;
P
Per Liden 已提交
1186

1187 1188
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1189 1190 1191 1192 1193 1194
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1195
		advance_rx_queue(sk);
P
Per Liden 已提交
1196 1197 1198 1199 1200 1201 1202
		goto restart;
	}

	/* Capture sender's address (optional) */
	set_orig_addr(m, msg);

	/* Capture ancillary data (optional) */
1203
	res = anc_data_recv(m, msg, port);
1204
	if (res)
P
Per Liden 已提交
1205 1206 1207 1208 1209 1210 1211 1212
		goto exit;

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
		if (unlikely(buf_len < sz)) {
			sz = buf_len;
			m->msg_flags |= MSG_TRUNC;
		}
1213 1214 1215
		res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
					      m->msg_iov, sz);
		if (res)
P
Per Liden 已提交
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
			goto exit;
		res = sz;
	} else {
		if ((sock->state == SS_READY) ||
		    ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
			res = 0;
		else
			res = -ECONNRESET;
	}

	/* Consume received message (optional) */
	if (likely(!(flags & MSG_PEEK))) {
1228
		if ((sock->state != SS_READY) &&
1229 1230 1231 1232
		    (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
			tipc_acknowledge(port->ref, tsk->rcv_unacked);
			tsk->rcv_unacked = 0;
		}
1233
		advance_rx_queue(sk);
1234
	}
P
Per Liden 已提交
1235
exit:
1236
	release_sock(sk);
P
Per Liden 已提交
1237 1238 1239
	return res;
}

1240
/**
1241
 * tipc_recv_stream - receive stream-oriented data
P
Per Liden 已提交
1242 1243 1244 1245
 * @iocb: (unused)
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1246 1247
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1248 1249 1250 1251
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1252 1253
static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
			    struct msghdr *m, size_t buf_len, int flags)
P
Per Liden 已提交
1254
{
1255
	struct sock *sk = sock->sk;
1256 1257
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_port *port = &tsk->port;
P
Per Liden 已提交
1258 1259
	struct sk_buff *buf;
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1260
	long timeo;
P
Per Liden 已提交
1261
	unsigned int sz;
1262
	int sz_to_copy, target, needed;
P
Per Liden 已提交
1263 1264
	int sz_copied = 0;
	u32 err;
1265
	int res = 0;
P
Per Liden 已提交
1266

1267
	/* Catch invalid receive attempts */
P
Per Liden 已提交
1268 1269 1270
	if (unlikely(!buf_len))
		return -EINVAL;

1271
	lock_sock(sk);
P
Per Liden 已提交
1272

Y
Ying Xue 已提交
1273
	if (unlikely(sock->state == SS_UNCONNECTED)) {
1274
		res = -ENOTCONN;
P
Per Liden 已提交
1275 1276 1277
		goto exit;
	}

1278
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
Y
Ying Xue 已提交
1279
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1280

1281
restart:
1282
	/* Look for a message in receive queue; wait if necessary */
1283
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1284 1285
	if (res)
		goto exit;
P
Per Liden 已提交
1286

1287 1288
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1289 1290 1291 1292 1293 1294
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1295
		advance_rx_queue(sk);
P
Per Liden 已提交
1296 1297 1298 1299 1300 1301
		goto restart;
	}

	/* Optionally capture sender's address & ancillary data of first msg */
	if (sz_copied == 0) {
		set_orig_addr(m, msg);
1302
		res = anc_data_recv(m, msg, port);
1303
		if (res)
P
Per Liden 已提交
1304 1305 1306 1307 1308
			goto exit;
	}

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
1309
		u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
P
Per Liden 已提交
1310

1311
		sz -= offset;
P
Per Liden 已提交
1312 1313
		needed = (buf_len - sz_copied);
		sz_to_copy = (sz <= needed) ? sz : needed;
1314 1315 1316 1317

		res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
					      m->msg_iov, sz_to_copy);
		if (res)
P
Per Liden 已提交
1318
			goto exit;
1319

P
Per Liden 已提交
1320 1321 1322 1323
		sz_copied += sz_to_copy;

		if (sz_to_copy < sz) {
			if (!(flags & MSG_PEEK))
1324 1325
				TIPC_SKB_CB(buf)->handle =
				(void *)(unsigned long)(offset + sz_to_copy);
P
Per Liden 已提交
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
			goto exit;
		}
	} else {
		if (sz_copied != 0)
			goto exit; /* can't add error msg to valid data */

		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
			res = 0;
		else
			res = -ECONNRESET;
	}

	/* Consume received message (optional) */
	if (likely(!(flags & MSG_PEEK))) {
1340 1341 1342 1343
		if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
			tipc_acknowledge(port->ref, tsk->rcv_unacked);
			tsk->rcv_unacked = 0;
		}
1344
		advance_rx_queue(sk);
1345
	}
P
Per Liden 已提交
1346 1347

	/* Loop around if more data is required */
1348 1349
	if ((sz_copied < buf_len) &&	/* didn't get all requested data */
	    (!skb_queue_empty(&sk->sk_receive_queue) ||
1350
	    (sz_copied < target)) &&	/* and more is ready or required */
1351 1352
	    (!(flags & MSG_PEEK)) &&	/* and aren't just peeking at data */
	    (!err))			/* and haven't reached a FIN */
P
Per Liden 已提交
1353 1354 1355
		goto restart;

exit:
1356
	release_sock(sk);
1357
	return sz_copied ? sz_copied : res;
P
Per Liden 已提交
1358 1359
}

1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
	if (wq_has_sleeper(wq))
		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
						POLLWRNORM | POLLWRBAND);
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1381
static void tipc_data_ready(struct sock *sk)
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
	if (wq_has_sleeper(wq))
		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
						POLLRDNORM | POLLRDBAND);
	rcu_read_unlock();
}

1393 1394
/**
 * filter_connect - Handle all incoming messages for a connection-based socket
1395
 * @tsk: TIPC socket
1396 1397
 * @msg: message
 *
1398
 * Returns 0 (TIPC_OK) if everyting ok, -TIPC_ERR_NO_PORT otherwise
1399
 */
1400
static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
1401
{
1402 1403
	struct sock *sk = &tsk->sk;
	struct tipc_port *port = &tsk->port;
1404
	struct socket *sock = sk->sk_socket;
1405
	struct tipc_msg *msg = buf_msg(*buf);
1406

1407
	int retval = -TIPC_ERR_NO_PORT;
1408
	int res;
1409 1410 1411 1412 1413 1414 1415

	if (msg_mcast(msg))
		return retval;

	switch ((int)sock->state) {
	case SS_CONNECTED:
		/* Accept only connection-based messages sent by peer */
1416
		if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) {
1417 1418
			if (unlikely(msg_errcode(msg))) {
				sock->state = SS_DISCONNECTING;
1419
				__tipc_port_disconnect(port);
1420 1421 1422 1423 1424 1425
			}
			retval = TIPC_OK;
		}
		break;
	case SS_CONNECTING:
		/* Accept only ACK or NACK message */
1426 1427
		if (unlikely(msg_errcode(msg))) {
			sock->state = SS_DISCONNECTING;
1428
			sk->sk_err = ECONNREFUSED;
1429 1430 1431 1432 1433 1434 1435
			retval = TIPC_OK;
			break;
		}

		if (unlikely(!msg_connected(msg)))
			break;

1436
		res = auto_connect(tsk, msg);
1437 1438
		if (res) {
			sock->state = SS_DISCONNECTING;
1439
			sk->sk_err = -res;
1440
			retval = TIPC_OK;
1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455
			break;
		}

		/* If an incoming message is an 'ACK-', it should be
		 * discarded here because it doesn't contain useful
		 * data. In addition, we should try to wake up
		 * connect() routine if sleeping.
		 */
		if (msg_data_sz(msg) == 0) {
			kfree_skb(*buf);
			*buf = NULL;
			if (waitqueue_active(sk_sleep(sk)))
				wake_up_interruptible(sk_sleep(sk));
		}
		retval = TIPC_OK;
1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470
		break;
	case SS_LISTENING:
	case SS_UNCONNECTED:
		/* Accept only SYN message */
		if (!msg_connected(msg) && !(msg_errcode(msg)))
			retval = TIPC_OK;
		break;
	case SS_DISCONNECTING:
		break;
	default:
		pr_err("Unknown socket state %u\n", sock->state);
	}
	return retval;
}

1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
 * @buf: message
 *
 * For all connection oriented messages, irrespective of importance,
 * the default overload value (i.e. 67MB) is set as limit.
 *
 * For all connectionless messages, by default new queue limits are
 * as belows:
 *
1482 1483 1484 1485
 * TIPC_LOW_IMPORTANCE       (4 MB)
 * TIPC_MEDIUM_IMPORTANCE    (8 MB)
 * TIPC_HIGH_IMPORTANCE      (16 MB)
 * TIPC_CRITICAL_IMPORTANCE  (32 MB)
1486 1487 1488 1489 1490 1491 1492 1493
 *
 * Returns overload limit according to corresponding message importance
 */
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
{
	struct tipc_msg *msg = buf_msg(buf);

	if (msg_connected(msg))
1494 1495 1496 1497
		return sysctl_tipc_rmem[2];

	return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
		msg_importance(msg);
1498 1499
}

1500
/**
1501 1502
 * filter_rcv - validate incoming message
 * @sk: socket
P
Per Liden 已提交
1503
 * @buf: message
1504
 *
1505 1506 1507 1508
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
 * Called with socket lock already taken; port lock may also be taken.
1509
 *
1510
 * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message
1511
 * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded
P
Per Liden 已提交
1512
 */
1513
static int filter_rcv(struct sock *sk, struct sk_buff *buf)
P
Per Liden 已提交
1514
{
1515
	struct socket *sock = sk->sk_socket;
1516
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1517
	struct tipc_msg *msg = buf_msg(buf);
1518
	unsigned int limit = rcvbuf_limit(sk, buf);
1519
	u32 onode;
1520
	int rc = TIPC_OK;
P
Per Liden 已提交
1521

1522 1523
	if (unlikely(msg_user(msg) == CONN_MANAGER))
		return tipc_sk_proto_rcv(tsk, &onode, buf);
1524

1525 1526 1527 1528 1529 1530 1531
	if (unlikely(msg_user(msg) == SOCK_WAKEUP)) {
		kfree_skb(buf);
		tsk->link_cong = 0;
		sk->sk_write_space(sk);
		return TIPC_OK;
	}

P
Per Liden 已提交
1532
	/* Reject message if it is wrong sort of message for socket */
1533
	if (msg_type(msg) > TIPC_DIRECT_MSG)
1534
		return -TIPC_ERR_NO_PORT;
1535

P
Per Liden 已提交
1536
	if (sock->state == SS_READY) {
1537
		if (msg_connected(msg))
1538
			return -TIPC_ERR_NO_PORT;
P
Per Liden 已提交
1539
	} else {
1540 1541 1542
		rc = filter_connect(tsk, &buf);
		if (rc != TIPC_OK || buf == NULL)
			return rc;
P
Per Liden 已提交
1543 1544 1545
	}

	/* Reject message if there isn't room to queue it */
1546
	if (sk_rmem_alloc_get(sk) + buf->truesize >= limit)
1547
		return -TIPC_ERR_OVERLOAD;
P
Per Liden 已提交
1548

1549
	/* Enqueue message */
Y
Ying Xue 已提交
1550
	TIPC_SKB_CB(buf)->handle = NULL;
1551
	__skb_queue_tail(&sk->sk_receive_queue, buf);
1552
	skb_set_owner_r(buf, sk);
1553

1554
	sk->sk_data_ready(sk);
1555 1556
	return TIPC_OK;
}
P
Per Liden 已提交
1557

1558
/**
1559
 * tipc_backlog_rcv - handle incoming message from backlog queue
1560 1561 1562 1563 1564 1565 1566
 * @sk: socket
 * @buf: message
 *
 * Caller must hold socket lock, but not port lock.
 *
 * Returns 0
 */
1567
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf)
1568
{
1569
	int rc;
1570
	u32 onode;
1571
	struct tipc_sock *tsk = tipc_sk(sk);
1572
	uint truesize = buf->truesize;
1573

1574
	rc = filter_rcv(sk, buf);
1575

1576 1577 1578 1579 1580 1581 1582 1583 1584
	if (likely(!rc)) {
		if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT)
			atomic_add(truesize, &tsk->dupl_rcvcnt);
		return 0;
	}

	if ((rc < 0) && !tipc_msg_reverse(buf, &onode, -rc))
		return 0;

1585
	tipc_link_xmit(buf, onode, 0);
1586

1587 1588 1589 1590
	return 0;
}

/**
1591
 * tipc_sk_rcv - handle incoming message
1592 1593 1594
 * @buf: buffer containing arriving message
 * Consumes buffer
 * Returns 0 if success, or errno: -EHOSTUNREACH
1595
 */
1596
int tipc_sk_rcv(struct sk_buff *buf)
1597
{
1598 1599 1600 1601
	struct tipc_sock *tsk;
	struct tipc_port *port;
	struct sock *sk;
	u32 dport = msg_destport(buf_msg(buf));
1602
	int rc = TIPC_OK;
1603
	uint limit;
1604
	u32 dnode;
1605

1606
	/* Validate destination and message */
1607 1608
	port = tipc_port_lock(dport);
	if (unlikely(!port)) {
1609
		rc = tipc_msg_eval(buf, &dnode);
1610 1611 1612 1613 1614 1615 1616
		goto exit;
	}

	tsk = tipc_port_to_sock(port);
	sk = &tsk->sk;

	/* Queue message */
1617
	bh_lock_sock(sk);
1618

1619
	if (!sock_owned_by_user(sk)) {
1620
		rc = filter_rcv(sk, buf);
1621
	} else {
1622 1623 1624 1625
		if (sk->sk_backlog.len == 0)
			atomic_set(&tsk->dupl_rcvcnt, 0);
		limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt);
		if (sk_add_backlog(sk, buf, limit))
1626
			rc = -TIPC_ERR_OVERLOAD;
1627 1628
	}
	bh_unlock_sock(sk);
1629
	tipc_port_unlock(port);
1630

1631
	if (likely(!rc))
1632 1633
		return 0;
exit:
1634
	if ((rc < 0) && !tipc_msg_reverse(buf, &dnode, -rc))
1635
		return -EHOSTUNREACH;
1636

1637
	tipc_link_xmit(buf, dnode, 0);
1638
	return (rc < 0) ? -EHOSTUNREACH : 0;
P
Per Liden 已提交
1639 1640
}

Y
Ying Xue 已提交
1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
		done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING);
		finish_wait(sk_sleep(sk), &wait);
	} while (!done);
	return 0;
}

P
Per Liden 已提交
1663
/**
1664
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
1665 1666 1667
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
1668
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
1669 1670 1671
 *
 * Returns 0 on success, errno otherwise
 */
1672 1673
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
1674
{
1675
	struct sock *sk = sock->sk;
1676 1677
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
Y
Ying Xue 已提交
1678 1679
	long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout;
	socket_state previous;
1680 1681
	int res;

1682 1683
	lock_sock(sk);

1684
	/* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
1685 1686 1687 1688
	if (sock->state == SS_READY) {
		res = -EOPNOTSUPP;
		goto exit;
	}
1689 1690 1691 1692 1693 1694 1695

	/*
	 * Reject connection attempt using multicast address
	 *
	 * Note: send_msg() validates the rest of the address fields,
	 *       so there's no need to do it here
	 */
1696 1697 1698 1699 1700
	if (dst->addrtype == TIPC_ADDR_MCAST) {
		res = -EINVAL;
		goto exit;
	}

Y
Ying Xue 已提交
1701
	previous = sock->state;
1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713
	switch (sock->state) {
	case SS_UNCONNECTED:
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

1714
		res = tipc_sendmsg(NULL, sock, &m, 0);
1715 1716 1717 1718 1719 1720 1721 1722 1723
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

		/* Just entered SS_CONNECTING state; the only
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
	case SS_CONNECTING:
Y
Ying Xue 已提交
1724 1725 1726 1727 1728 1729 1730
		if (previous == SS_CONNECTING)
			res = -EALREADY;
		if (!timeout)
			goto exit;
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
1731 1732 1733 1734 1735 1736
		break;
	case SS_CONNECTED:
		res = -EISCONN;
		break;
	default:
		res = -EINVAL;
Y
Ying Xue 已提交
1737
		break;
1738
	}
1739 1740
exit:
	release_sock(sk);
1741
	return res;
P
Per Liden 已提交
1742 1743
}

1744
/**
1745
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
1746 1747
 * @sock: socket structure
 * @len: (unused)
1748
 *
P
Per Liden 已提交
1749 1750
 * Returns 0 on success, errno otherwise
 */
1751
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
1752
{
1753 1754 1755 1756
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
P
Per Liden 已提交
1757

1758
	if (sock->state != SS_UNCONNECTED)
1759 1760 1761 1762 1763 1764 1765 1766
		res = -EINVAL;
	else {
		sock->state = SS_LISTENING;
		res = 0;
	}

	release_sock(sk);
	return res;
P
Per Liden 已提交
1767 1768
}

Y
Ying Xue 已提交
1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
1783
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EINVAL;
		if (sock->state != SS_LISTENING)
			break;
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

1805
/**
1806
 * tipc_accept - wait for connection request
P
Per Liden 已提交
1807 1808 1809
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
1810
 *
P
Per Liden 已提交
1811 1812
 * Returns 0 on success, errno otherwise
 */
1813
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
P
Per Liden 已提交
1814
{
1815
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
1816
	struct sk_buff *buf;
1817
	struct tipc_port *new_port;
1818
	struct tipc_msg *msg;
1819
	struct tipc_portid peer;
1820
	u32 new_ref;
Y
Ying Xue 已提交
1821
	long timeo;
1822
	int res;
P
Per Liden 已提交
1823

1824
	lock_sock(sk);
P
Per Liden 已提交
1825

1826 1827
	if (sock->state != SS_LISTENING) {
		res = -EINVAL;
P
Per Liden 已提交
1828 1829
		goto exit;
	}
Y
Ying Xue 已提交
1830 1831 1832 1833
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
1834 1835 1836

	buf = skb_peek(&sk->sk_receive_queue);

1837
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
1838 1839
	if (res)
		goto exit;
P
Per Liden 已提交
1840

1841
	new_sk = new_sock->sk;
1842
	new_port = &tipc_sk(new_sk)->port;
1843
	new_ref = new_port->ref;
1844
	msg = buf_msg(buf);
P
Per Liden 已提交
1845

1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
	reject_rx_queue(new_sk);

	/* Connect new socket to it's peer */
1856 1857 1858
	peer.ref = msg_origport(msg);
	peer.node = msg_orignode(msg);
	tipc_port_connect(new_ref, &peer);
1859 1860
	new_sock->state = SS_CONNECTED;

1861
	tipc_port_set_importance(new_port, msg_importance(msg));
1862
	if (msg_named(msg)) {
1863 1864
		new_port->conn_type = msg_nametype(msg);
		new_port->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
1865
	}
1866 1867 1868 1869 1870 1871 1872 1873 1874

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

		advance_rx_queue(sk);
1875
		tipc_send_packet(NULL, new_sock, &m, 0);
1876 1877 1878
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
1879
		skb_set_owner_r(buf, new_sk);
1880 1881
	}
	release_sock(new_sk);
P
Per Liden 已提交
1882
exit:
1883
	release_sock(sk);
P
Per Liden 已提交
1884 1885 1886 1887
	return res;
}

/**
1888
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
1889
 * @sock: socket structure
1890
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
1891 1892
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
1893
 *
P
Per Liden 已提交
1894 1895
 * Returns 0 on success, errno otherwise
 */
1896
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
1897
{
1898
	struct sock *sk = sock->sk;
1899 1900
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_port *port = &tsk->port;
P
Per Liden 已提交
1901
	struct sk_buff *buf;
1902
	u32 dnode;
P
Per Liden 已提交
1903 1904
	int res;

1905 1906
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
1907

1908
	lock_sock(sk);
P
Per Liden 已提交
1909 1910

	switch (sock->state) {
1911
	case SS_CONNECTING:
P
Per Liden 已提交
1912 1913 1914
	case SS_CONNECTED:

restart:
1915
		/* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
1916 1917
		buf = __skb_dequeue(&sk->sk_receive_queue);
		if (buf) {
Y
Ying Xue 已提交
1918
			if (TIPC_SKB_CB(buf)->handle != NULL) {
1919
				kfree_skb(buf);
P
Per Liden 已提交
1920 1921
				goto restart;
			}
1922
			tipc_port_disconnect(port->ref);
1923 1924
			if (tipc_msg_reverse(buf, &dnode, TIPC_CONN_SHUTDOWN))
				tipc_link_xmit(buf, dnode, port->ref);
1925
		} else {
1926 1927 1928 1929 1930 1931 1932 1933
			dnode = tipc_port_peernode(port);
			buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
					      TIPC_CONN_MSG, SHORT_H_SIZE,
					      0, dnode, tipc_own_addr,
					      tipc_port_peerport(port),
					      port->ref, TIPC_CONN_SHUTDOWN);
			tipc_link_xmit(buf, dnode, port->ref);
			__tipc_port_disconnect(port);
P
Per Liden 已提交
1934
		}
1935 1936

		sock->state = SS_DISCONNECTING;
P
Per Liden 已提交
1937 1938 1939 1940 1941

		/* fall through */

	case SS_DISCONNECTING:

1942
		/* Discard any unreceived messages */
1943
		__skb_queue_purge(&sk->sk_receive_queue);
1944 1945 1946

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
1947 1948 1949 1950 1951 1952 1953
		res = 0;
		break;

	default:
		res = -ENOTCONN;
	}

1954
	release_sock(sk);
P
Per Liden 已提交
1955 1956 1957
	return res;
}

1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001
static void tipc_sk_timeout(unsigned long ref)
{
	struct tipc_port *port = tipc_port_lock(ref);
	struct tipc_sock *tsk;
	struct sock *sk;
	struct sk_buff *buf = NULL;
	struct tipc_msg *msg = NULL;
	u32 peer_port, peer_node;

	if (!port)
		return;

	if (!port->connected) {
		tipc_port_unlock(port);
		return;
	}
	tsk = tipc_port_to_sock(port);
	sk = &tsk->sk;
	bh_lock_sock(sk);
	peer_port = tipc_port_peerport(port);
	peer_node = tipc_port_peernode(port);

	if (port->probing_state == TIPC_CONN_PROBING) {
		/* Previous probe not answered -> self abort */
		buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
				      SHORT_H_SIZE, 0, tipc_own_addr,
				      peer_node, ref, peer_port,
				      TIPC_ERR_NO_PORT);
	} else {
		buf = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE,
				      0, peer_node, tipc_own_addr,
				      peer_port, ref, TIPC_OK);
		port->probing_state = TIPC_CONN_PROBING;
		k_start_timer(&port->timer, port->probing_interval);
	}
	bh_unlock_sock(sk);
	tipc_port_unlock(port);
	if (!buf)
		return;

	msg = buf_msg(buf);
	tipc_link_xmit(buf, msg_destnode(msg),	msg_link_selector(msg));
}

P
Per Liden 已提交
2002
/**
2003
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2004 2005 2006 2007 2008
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2009 2010
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2011
 * (to ease compatibility).
2012
 *
P
Per Liden 已提交
2013 2014
 * Returns 0 on success, errno otherwise
 */
2015 2016
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2017
{
2018
	struct sock *sk = sock->sk;
2019 2020
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_port *port = &tsk->port;
P
Per Liden 已提交
2021 2022 2023
	u32 value;
	int res;

2024 2025
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2026 2027 2028 2029
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
	if (ol < sizeof(value))
		return -EINVAL;
2030 2031
	res = get_user(value, (u32 __user *)ov);
	if (res)
P
Per Liden 已提交
2032 2033
		return res;

2034
	lock_sock(sk);
2035

P
Per Liden 已提交
2036 2037
	switch (opt) {
	case TIPC_IMPORTANCE:
2038
		res = tipc_port_set_importance(port, value);
P
Per Liden 已提交
2039 2040 2041
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2042
			tipc_port_set_unreliable(port, value);
2043
		else
P
Per Liden 已提交
2044 2045 2046
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2047
		tipc_port_set_unreturnable(port, value);
P
Per Liden 已提交
2048 2049
		break;
	case TIPC_CONN_TIMEOUT:
2050
		tipc_sk(sk)->conn_timeout = value;
2051
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2052 2053 2054 2055 2056
		break;
	default:
		res = -EINVAL;
	}

2057 2058
	release_sock(sk);

P
Per Liden 已提交
2059 2060 2061 2062
	return res;
}

/**
2063
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2064 2065 2066 2067 2068
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2069 2070
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2071
 * (to ease compatibility).
2072
 *
P
Per Liden 已提交
2073 2074
 * Returns 0 on success, errno otherwise
 */
2075 2076
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2077
{
2078
	struct sock *sk = sock->sk;
2079 2080
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_port *port = &tsk->port;
2081
	int len;
P
Per Liden 已提交
2082
	u32 value;
2083
	int res;
P
Per Liden 已提交
2084

2085 2086
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2087 2088
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2089 2090
	res = get_user(len, ol);
	if (res)
2091
		return res;
P
Per Liden 已提交
2092

2093
	lock_sock(sk);
P
Per Liden 已提交
2094 2095 2096

	switch (opt) {
	case TIPC_IMPORTANCE:
2097
		value = tipc_port_importance(port);
P
Per Liden 已提交
2098 2099
		break;
	case TIPC_SRC_DROPPABLE:
2100
		value = tipc_port_unreliable(port);
P
Per Liden 已提交
2101 2102
		break;
	case TIPC_DEST_DROPPABLE:
2103
		value = tipc_port_unreturnable(port);
P
Per Liden 已提交
2104 2105
		break;
	case TIPC_CONN_TIMEOUT:
2106
		value = tipc_sk(sk)->conn_timeout;
2107
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2108
		break;
2109
	case TIPC_NODE_RECVQ_DEPTH:
2110
		value = 0; /* was tipc_queue_size, now obsolete */
2111
		break;
2112
	case TIPC_SOCK_RECVQ_DEPTH:
2113 2114
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
P
Per Liden 已提交
2115 2116 2117 2118
	default:
		res = -EINVAL;
	}

2119 2120
	release_sock(sk);

2121 2122
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2123

2124 2125 2126 2127 2128 2129 2130
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2131 2132
}

2133
static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153
{
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
		if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer,
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

2154 2155
/* Protocol switches for the various types of TIPC sockets */

2156
static const struct proto_ops msg_ops = {
2157
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2158
	.family		= AF_TIPC,
2159 2160 2161
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2162
	.socketpair	= sock_no_socketpair,
2163
	.accept		= sock_no_accept,
2164 2165
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2166
	.ioctl		= tipc_ioctl,
2167
	.listen		= sock_no_listen,
2168 2169 2170 2171 2172
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
2173 2174
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2175 2176
};

2177
static const struct proto_ops packet_ops = {
2178
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2179
	.family		= AF_TIPC,
2180 2181 2182
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2183
	.socketpair	= sock_no_socketpair,
2184 2185 2186
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2187
	.ioctl		= tipc_ioctl,
2188 2189 2190 2191 2192 2193
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
2194 2195
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2196 2197
};

2198
static const struct proto_ops stream_ops = {
2199
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2200
	.family		= AF_TIPC,
2201 2202 2203
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2204
	.socketpair	= sock_no_socketpair,
2205 2206 2207
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2208
	.ioctl		= tipc_ioctl,
2209 2210 2211 2212 2213 2214
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_stream,
	.recvmsg	= tipc_recv_stream,
2215 2216
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2217 2218
};

2219
static const struct net_proto_family tipc_family_ops = {
2220
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2221
	.family		= AF_TIPC,
2222
	.create		= tipc_sk_create
P
Per Liden 已提交
2223 2224 2225 2226 2227
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
2228 2229
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
2230 2231
};

2232 2233 2234 2235 2236 2237
static struct proto tipc_proto_kern = {
	.name		= "TIPC",
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
};

P
Per Liden 已提交
2238
/**
2239
 * tipc_socket_init - initialize TIPC socket interface
2240
 *
P
Per Liden 已提交
2241 2242
 * Returns 0 on success, errno otherwise
 */
2243
int tipc_socket_init(void)
P
Per Liden 已提交
2244 2245 2246
{
	int res;

2247
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
2248
	if (res) {
2249
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
2250 2251 2252 2253 2254
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
2255
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
2256 2257 2258 2259 2260 2261 2262 2263
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
2264
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
2265
 */
2266
void tipc_socket_stop(void)
P
Per Liden 已提交
2267 2268 2269 2270
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}