socket.c 72.4 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
4
 * Copyright (c) 2001-2007, 2012-2015, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
P
Per Liden 已提交
38
#include "core.h"
39
#include "name_table.h"
E
Erik Hugne 已提交
40
#include "node.h"
41
#include "link.h"
42
#include "name_distr.h"
43
#include "socket.h"
44

45 46
#define SS_LISTENING		-1	/* socket is listening */
#define SS_READY		-2	/* socket is connectionless */
P
Per Liden 已提交
47

48
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
49
#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
50 51 52 53 54
#define TIPC_FWD_MSG		1
#define TIPC_CONN_OK		0
#define TIPC_CONN_PROBING	1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
55 56 57 58 59 60 61 62 63

/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @connected: non-zero if port is currently connected to a peer port
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
64
 * @portid: unique port identity in TIPC socket hash table
65 66 67 68 69
 * @phdr: preformatted message header used when sending messages
 * @port_list: adjacent ports in TIPC's global list of ports
 * @publications: list of publications for port
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
70
 * @probing_intv:
71 72 73 74 75
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
 * @link_cong: non-zero if owner must sleep because of link congestion
 * @sent_unacked: # messages sent by socket, and not yet acked by peer
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
76
 * @remote: 'connected' peer for dgram/rdm
77 78
 * @node: hash table node
 * @rcu: rcu struct for tipc_sock
79 80 81 82 83 84 85 86
 */
struct tipc_sock {
	struct sock sk;
	int connected;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
87
	u32 portid;
88 89 90 91 92
	struct tipc_msg phdr;
	struct list_head sock_list;
	struct list_head publications;
	u32 pub_count;
	u32 probing_state;
93
	unsigned long probing_intv;
94 95 96 97 98
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
	bool link_cong;
	uint sent_unacked;
	uint rcv_unacked;
99
	struct sockaddr_tipc remote;
100 101
	struct rhash_head node;
	struct rcu_head rcu;
102
};
P
Per Liden 已提交
103

104
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
105
static void tipc_data_ready(struct sock *sk);
106
static void tipc_write_space(struct sock *sk);
107 108
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
109
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
110
static void tipc_sk_timeout(unsigned long data);
111
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
112
			   struct tipc_name_seq const *seq);
113
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
114
			    struct tipc_name_seq const *seq);
115
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
116 117
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
118 119 120
static int __tipc_send_stream(struct socket *sock, struct msghdr *m,
			      size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
121

122 123 124
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
125 126
static struct proto tipc_proto;

127 128 129 130 131 132 133 134
static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
	[TIPC_NLA_SOCK_UNSPEC]		= { .type = NLA_UNSPEC },
	[TIPC_NLA_SOCK_ADDR]		= { .type = NLA_U32 },
	[TIPC_NLA_SOCK_REF]		= { .type = NLA_U32 },
	[TIPC_NLA_SOCK_CON]		= { .type = NLA_NESTED },
	[TIPC_NLA_SOCK_HAS_PUBL]	= { .type = NLA_FLAG }
};

135 136
static const struct rhashtable_params tsk_rht_params;

137
/*
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
 * Revised TIPC socket locking policy:
 *
 * Most socket operations take the standard socket lock when they start
 * and hold it until they finish (or until they need to sleep).  Acquiring
 * this lock grants the owner exclusive access to the fields of the socket
 * data structures, with the exception of the backlog queue.  A few socket
 * operations can be done without taking the socket lock because they only
 * read socket information that never changes during the life of the socket.
 *
 * Socket operations may acquire the lock for the associated TIPC port if they
 * need to perform an operation on the port.  If any routine needs to acquire
 * both the socket lock and the port lock it must take the socket lock first
 * to avoid the risk of deadlock.
 *
 * The dispatcher handling incoming messages cannot grab the socket lock in
 * the standard fashion, since invoked it runs at the BH level and cannot block.
 * Instead, it checks to see if the socket lock is currently owned by someone,
 * and either handles the message itself or adds it to the socket's backlog
 * queue; in the latter case the queued message is processed once the process
 * owning the socket lock releases it.
 *
 * NOTE: Releasing the socket lock while an operation is sleeping overcomes
 * the problem of a blocked socket operation preventing any other operations
 * from occurring.  However, applications must be careful if they have
 * multiple threads trying to send (or receive) on the same socket, as these
 * operations might interfere with each other.  For example, doing a connect
 * and a receive at the same time might allow the receive to consume the
 * ACK message meant for the connect.  While additional work could be done
 * to try and overcome this, it doesn't seem to be worthwhile at the present.
 *
 * NOTE: Releasing the socket lock while an operation is sleeping also ensures
 * that another operation that must be performed in a non-blocking manner is
 * not delayed for very long because the lock has already been taken.
 *
 * NOTE: This code assumes that certain fields of a port/socket pair are
 * constant over its lifetime; such fields can be examined without taking
 * the socket lock and/or port lock, and do not need to be re-read even
 * after resuming processing after waiting.  These fields include:
 *   - socket type
 *   - pointer to socket sk structure (aka tipc_sock structure)
 *   - pointer to port structure
 *   - port reference
 */

182 183 184 185 186
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

187
static u32 tsk_peer_node(struct tipc_sock *tsk)
188
{
189
	return msg_destnode(&tsk->phdr);
190 191
}

192
static u32 tsk_peer_port(struct tipc_sock *tsk)
193
{
194
	return msg_destport(&tsk->phdr);
195 196
}

197
static  bool tsk_unreliable(struct tipc_sock *tsk)
198
{
199
	return msg_src_droppable(&tsk->phdr) != 0;
200 201
}

202
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
203
{
204
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
205 206
}

207
static bool tsk_unreturnable(struct tipc_sock *tsk)
208
{
209
	return msg_dest_droppable(&tsk->phdr) != 0;
210 211
}

212
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
213
{
214
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
215 216
}

217
static int tsk_importance(struct tipc_sock *tsk)
218
{
219
	return msg_importance(&tsk->phdr);
220 221
}

222
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
223 224 225
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
226
	msg_set_importance(&tsk->phdr, (u32)imp);
227 228
	return 0;
}
229

230 231 232 233 234 235 236 237 238 239
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

static int tsk_conn_cong(struct tipc_sock *tsk)
{
	return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN;
}

240
/**
241
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
242 243
 *
 * Caller must hold socket lock
P
Per Liden 已提交
244
 */
245
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
246
{
247
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
248 249 250
}

/**
251
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
252 253
 *
 * Caller must hold socket lock
P
Per Liden 已提交
254
 */
255
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
256
{
257
	struct sk_buff *skb;
258
	u32 dnode;
259
	u32 own_node = tsk_own_node(tipc_sk(sk));
260

261
	while ((skb = __skb_dequeue(&sk->sk_receive_queue))) {
262 263
		if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT))
			tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0);
264
	}
P
Per Liden 已提交
265 266
}

267
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
268 269 270 271
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
272
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
273
{
274
	struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id);
275
	u32 peer_port = tsk_peer_port(tsk);
J
Jon Paul Maloy 已提交
276 277 278
	u32 orig_node;
	u32 peer_node;

279
	if (unlikely(!tsk->connected))
J
Jon Paul Maloy 已提交
280 281 282 283 284 285
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
286
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
287 288 289 290

	if (likely(orig_node == peer_node))
		return true;

291
	if (!orig_node && (peer_node == tn->own_addr))
J
Jon Paul Maloy 已提交
292 293
		return true;

294
	if (!peer_node && (orig_node == tn->own_addr))
J
Jon Paul Maloy 已提交
295 296 297 298 299
		return true;

	return false;
}

P
Per Liden 已提交
300
/**
301
 * tipc_sk_create - create a TIPC socket
302
 * @net: network namespace (must be default network)
P
Per Liden 已提交
303 304
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
305
 * @kern: caused by kernel or by userspace?
306
 *
307 308
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
309 310 311
 *
 * Returns 0 on success, errno otherwise
 */
312 313
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
314
{
315
	struct tipc_net *tn;
316 317
	const struct proto_ops *ops;
	socket_state state;
P
Per Liden 已提交
318
	struct sock *sk;
319
	struct tipc_sock *tsk;
320
	struct tipc_msg *msg;
321 322

	/* Validate arguments */
P
Per Liden 已提交
323 324 325 326 327
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
328 329
		ops = &stream_ops;
		state = SS_UNCONNECTED;
P
Per Liden 已提交
330 331
		break;
	case SOCK_SEQPACKET:
332 333
		ops = &packet_ops;
		state = SS_UNCONNECTED;
P
Per Liden 已提交
334 335 336
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
337 338
		ops = &msg_ops;
		state = SS_READY;
P
Per Liden 已提交
339
		break;
340 341
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
342 343
	}

344
	/* Allocate socket's protocol area */
Y
Ying Xue 已提交
345
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
346
	if (sk == NULL)
P
Per Liden 已提交
347 348
		return -ENOMEM;

349
	tsk = tipc_sk(sk);
350 351 352
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
	msg = &tsk->phdr;
353 354
	tn = net_generic(sock_net(sk), tipc_net_id);
	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
355
		      NAMED_H_SIZE, 0);
P
Per Liden 已提交
356

357 358 359 360
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock->state = state;
	sock_init_data(sock, sk);
361 362 363 364 365
	if (tipc_sk_insert(tsk)) {
		pr_warn("Socket create failed; port numbrer exhausted\n");
		return -EINVAL;
	}
	msg_set_origport(msg, tsk->portid);
366
	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
367
	sk->sk_backlog_rcv = tipc_backlog_rcv;
368
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
369 370
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
371
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
372
	tsk->sent_unacked = 0;
373
	atomic_set(&tsk->dupl_rcvcnt, 0);
374

375
	if (sock->state == SS_READY) {
376
		tsk_set_unreturnable(tsk, true);
377
		if (sock->type == SOCK_DGRAM)
378
			tsk_set_unreliable(tsk, true);
379
	}
P
Per Liden 已提交
380 381 382
	return 0;
}

383 384 385 386 387 388 389
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

P
Per Liden 已提交
390
/**
391
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
392 393 394 395 396 397 398
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
399
 *
P
Per Liden 已提交
400 401 402 403 404 405
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
406
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
407 408
{
	struct sock *sk = sock->sk;
409
	struct net *net;
410
	struct tipc_sock *tsk;
411
	struct sk_buff *skb;
412
	u32 dnode, probing_state;
P
Per Liden 已提交
413

414 415 416 417 418
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
419
		return 0;
420

421
	net = sock_net(sk);
422
	tsk = tipc_sk(sk);
423 424 425 426 427 428
	lock_sock(sk);

	/*
	 * Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer)
	 */
429
	dnode = tsk_peer_node(tsk);
P
Per Liden 已提交
430
	while (sock->state != SS_DISCONNECTING) {
431 432
		skb = __skb_dequeue(&sk->sk_receive_queue);
		if (skb == NULL)
P
Per Liden 已提交
433
			break;
434 435
		if (TIPC_SKB_CB(skb)->handle != NULL)
			kfree_skb(skb);
436 437 438 439
		else {
			if ((sock->state == SS_CONNECTING) ||
			    (sock->state == SS_CONNECTED)) {
				sock->state = SS_DISCONNECTING;
440
				tsk->connected = 0;
441
				tipc_node_remove_conn(net, dnode, tsk->portid);
442
			}
443
			if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode,
444
					     TIPC_ERR_NO_PORT))
445
				tipc_link_xmit_skb(net, skb, dnode, 0);
446
		}
P
Per Liden 已提交
447 448
	}

449
	tipc_sk_withdraw(tsk, 0, NULL);
450
	probing_state = tsk->probing_state;
451 452
	if (del_timer_sync(&sk->sk_timer) &&
	    probing_state != TIPC_CONN_PROBING)
453
		sock_put(sk);
454
	tipc_sk_remove(tsk);
455
	if (tsk->connected) {
456
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
457
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
458
				      tsk_own_node(tsk), tsk_peer_port(tsk),
459
				      tsk->portid, TIPC_ERR_NO_PORT);
460
		if (skb)
461 462
			tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
		tipc_node_remove_conn(net, dnode, tsk->portid);
463
	}
P
Per Liden 已提交
464

465
	/* Discard any remaining (connection-based) messages in receive queue */
466
	__skb_queue_purge(&sk->sk_receive_queue);
P
Per Liden 已提交
467

468 469 470
	/* Reject any messages that accumulated in backlog queue */
	sock->state = SS_DISCONNECTING;
	release_sock(sk);
471 472

	call_rcu(&tsk->rcu, tipc_sk_callback);
473
	sock->sk = NULL;
P
Per Liden 已提交
474

475
	return 0;
P
Per Liden 已提交
476 477 478
}

/**
479
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
480 481 482
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
483
 *
P
Per Liden 已提交
484 485 486
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
487
 *
P
Per Liden 已提交
488
 * Returns 0 on success, errno otherwise
489 490 491
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
492
 */
493 494
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
495
{
496
	struct sock *sk = sock->sk;
P
Per Liden 已提交
497
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
498
	struct tipc_sock *tsk = tipc_sk(sk);
499
	int res = -EINVAL;
P
Per Liden 已提交
500

501 502
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
503
		res = tipc_sk_withdraw(tsk, 0, NULL);
504 505
		goto exit;
	}
506

507 508 509 510 511 512 513 514
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
515 516 517

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
518 519 520 521
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
522

523
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
524
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
525 526 527 528
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
529

530
	res = (addr->scope > 0) ?
531 532
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
533 534 535
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
536 537
}

538
/**
539
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
540 541 542
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
543
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
544
 *
P
Per Liden 已提交
545
 * Returns 0 on success, errno otherwise
546
 *
547 548
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
549
 *       a completely predictable manner).
P
Per Liden 已提交
550
 */
551 552
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
			int *uaddr_len, int peer)
P
Per Liden 已提交
553 554
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
555
	struct tipc_sock *tsk = tipc_sk(sock->sk);
556
	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
P
Per Liden 已提交
557

558
	memset(addr, 0, sizeof(*addr));
559
	if (peer) {
560 561 562
		if ((sock->state != SS_CONNECTED) &&
			((peer != 2) || (sock->state != SS_DISCONNECTING)))
			return -ENOTCONN;
563 564
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
565
	} else {
566
		addr->addr.id.ref = tsk->portid;
567
		addr->addr.id.node = tn->own_addr;
568
	}
P
Per Liden 已提交
569 570 571 572 573 574 575

	*uaddr_len = sizeof(*addr);
	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

576
	return 0;
P
Per Liden 已提交
577 578 579
}

/**
580
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
581 582 583 584
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
 * @wait: ???
 *
585 586 587 588 589 590 591 592 593
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
 * TIPC sets the returned events as follows:
594 595 596 597
 *
 * socket state		flags set
 * ------------		---------
 * unconnected		no read flags
598
 *			POLLOUT if port is not congested
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
 *
 * connecting		POLLIN/POLLRDNORM if ACK/NACK in rx queue
 *			no write flags
 *
 * connected		POLLIN/POLLRDNORM if data in rx queue
 *			POLLOUT if port is not congested
 *
 * disconnecting	POLLIN/POLLRDNORM/POLLHUP
 *			no write flags
 *
 * listening		POLLIN if SYN in rx queue
 *			no write flags
 *
 * ready		POLLIN/POLLRDNORM if data in rx queue
 * [connectionless]	POLLOUT (since port cannot be congested)
 *
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
618
 */
619 620
static unsigned int tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
621
{
622
	struct sock *sk = sock->sk;
623
	struct tipc_sock *tsk = tipc_sk(sk);
624
	u32 mask = 0;
625

626
	sock_poll_wait(file, sk_sleep(sk), wait);
627

628
	switch ((int)sock->state) {
629
	case SS_UNCONNECTED:
630
		if (!tsk->link_cong)
631 632
			mask |= POLLOUT;
		break;
633 634
	case SS_READY:
	case SS_CONNECTED:
635
		if (!tsk->link_cong && !tsk_conn_cong(tsk))
636 637 638 639 640 641 642 643 644 645 646
			mask |= POLLOUT;
		/* fall thru' */
	case SS_CONNECTING:
	case SS_LISTENING:
		if (!skb_queue_empty(&sk->sk_receive_queue))
			mask |= (POLLIN | POLLRDNORM);
		break;
	case SS_DISCONNECTING:
		mask = (POLLIN | POLLRDNORM | POLLHUP);
		break;
	}
647 648

	return mask;
P
Per Liden 已提交
649 650
}

651 652 653 654
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
655
 * @msg: message to send
656 657 658 659 660 661 662
 * @dsz: total length of message data
 * @timeo: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
663
			  struct msghdr *msg, size_t dsz, long timeo)
664 665
{
	struct sock *sk = sock->sk;
666
	struct tipc_sock *tsk = tipc_sk(sk);
667
	struct net *net = sock_net(sk);
668
	struct tipc_msg *mhdr = &tsk->phdr;
669
	struct sk_buff_head *pktchain = &sk->sk_write_queue;
A
Al Viro 已提交
670
	struct iov_iter save = msg->msg_iter;
671 672 673 674 675 676 677 678 679 680 681 682 683 684
	uint mtu;
	int rc;

	msg_set_type(mhdr, TIPC_MCAST_MSG);
	msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(mhdr, 0);
	msg_set_destnode(mhdr, 0);
	msg_set_nametype(mhdr, seq->type);
	msg_set_namelower(mhdr, seq->lower);
	msg_set_nameupper(mhdr, seq->upper);
	msg_set_hdr_sz(mhdr, MCAST_H_SIZE);

new_mtu:
	mtu = tipc_bclink_get_mtu();
685
	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
686 687 688 689
	if (unlikely(rc < 0))
		return rc;

	do {
690
		rc = tipc_bclink_xmit(net, pktchain);
691 692 693 694
		if (likely(rc >= 0)) {
			rc = dsz;
			break;
		}
A
Al Viro 已提交
695 696
		if (rc == -EMSGSIZE) {
			msg->msg_iter = save;
697
			goto new_mtu;
A
Al Viro 已提交
698
		}
699 700
		if (rc != -ELINKCONG)
			break;
701
		tipc_sk(sk)->link_cong = 1;
702 703
		rc = tipc_wait_for_sndmsg(sock, &timeo);
		if (rc)
704
			__skb_queue_purge(pktchain);
705 706 707 708
	} while (!rc);
	return rc;
}

709 710 711 712 713 714
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
715
 */
716 717
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
718
{
719
	struct tipc_msg *msg;
720 721
	struct tipc_plist dports;
	u32 portid;
722
	u32 scope = TIPC_CLUSTER_SCOPE;
723 724 725
	struct sk_buff_head tmpq;
	uint hsz;
	struct sk_buff *skb, *_skb;
726

727
	__skb_queue_head_init(&tmpq);
728
	tipc_plist_init(&dports);
729

730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
		msg = buf_msg(skb);
		hsz = skb_headroom(skb) + msg_hdr_sz(msg);

		if (in_own_node(net, msg_orignode(msg)))
			scope = TIPC_NODE_SCOPE;

		/* Create destination port list and message clones: */
		tipc_nametbl_mc_translate(net,
					  msg_nametype(msg), msg_namelower(msg),
					  msg_nameupper(msg), scope, &dports);
		portid = tipc_plist_pop(&dports);
		for (; portid; portid = tipc_plist_pop(&dports)) {
			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
751
		}
752 753 754 755 756 757 758 759 760
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
761
	}
762
	tipc_sk_rcv(net, inputq);
763 764
}

765 766 767
/**
 * tipc_sk_proto_rcv - receive a connection mng protocol message
 * @tsk: receiving socket
768
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed.
769
 */
770
static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb)
771
{
772
	struct tipc_msg *msg = buf_msg(*skb);
773
	int conn_cong;
774 775
	u32 dnode;
	u32 own_node = tsk_own_node(tsk);
776
	/* Ignore if connection cannot be validated: */
777
	if (!tsk_peer_msg(tsk, msg))
778 779
		goto exit;

780
	tsk->probing_state = TIPC_CONN_OK;
781 782

	if (msg_type(msg) == CONN_ACK) {
783
		conn_cong = tsk_conn_cong(tsk);
784 785
		tsk->sent_unacked -= msg_msgcnt(msg);
		if (conn_cong)
786
			tsk->sk.sk_write_space(&tsk->sk);
787
	} else if (msg_type(msg) == CONN_PROBE) {
788 789 790 791
		if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) {
			msg_set_type(msg, CONN_PROBE_REPLY);
			return;
		}
792 793 794
	}
	/* Do nothing if msg_type() == CONN_PROBE_REPLY */
exit:
795 796
	kfree_skb(*skb);
	*skb = NULL;
797 798
}

799 800 801
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
{
	struct sock *sk = sock->sk;
802
	struct tipc_sock *tsk = tipc_sk(sk);
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
	DEFINE_WAIT(wait);
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (sock->state == SS_DISCONNECTING)
			return -EPIPE;
		if (!*timeo_p)
			return -EAGAIN;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
818
		done = sk_wait_event(sk, timeo_p, !tsk->link_cong);
819 820 821 822 823
		finish_wait(sk_sleep(sk), &wait);
	} while (!done);
	return 0;
}

P
Per Liden 已提交
824
/**
825
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
826 827
 * @sock: socket structure
 * @m: message to send
828
 * @dsz: amount of user data to be sent
829
 *
P
Per Liden 已提交
830
 * Message must have an destination specified explicitly.
831
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
832 833
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
834
 *
P
Per Liden 已提交
835 836
 * Returns the number of bytes sent on success, or errno otherwise
 */
837
static int tipc_sendmsg(struct socket *sock,
838
			struct msghdr *m, size_t dsz)
839 840 841 842 843 844 845 846 847 848 849 850
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
851
{
852
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
853
	struct sock *sk = sock->sk;
854
	struct tipc_sock *tsk = tipc_sk(sk);
855
	struct net *net = sock_net(sk);
856
	struct tipc_msg *mhdr = &tsk->phdr;
857
	u32 dnode, dport;
858
	struct sk_buff_head *pktchain = &sk->sk_write_queue;
859
	struct sk_buff *skb;
860
	struct tipc_name_seq *seq;
A
Al Viro 已提交
861
	struct iov_iter save;
862
	u32 mtu;
863
	long timeo;
E
Erik Hugne 已提交
864
	int rc;
P
Per Liden 已提交
865

866
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
867
		return -EMSGSIZE;
868 869 870 871 872 873 874 875 876
	if (unlikely(!dest)) {
		if (tsk->connected && sock->state == SS_READY)
			dest = &tsk->remote;
		else
			return -EDESTADDRREQ;
	} else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
		   dest->family != AF_TIPC) {
		return -EINVAL;
	}
877
	if (unlikely(sock->state != SS_READY)) {
878 879 880 881 882 883
		if (sock->state == SS_LISTENING)
			return -EPIPE;
		if (sock->state != SS_UNCONNECTED)
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
884
		if (dest->addrtype == TIPC_ADDR_NAME) {
885 886
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
887
		}
P
Per Liden 已提交
888
	}
889
	seq = &dest->addr.nameseq;
890
	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
891 892

	if (dest->addrtype == TIPC_ADDR_MCAST) {
893
		return tipc_sendmcast(sock, seq, m, dsz, timeo);
894 895 896 897 898 899 900 901 902 903 904
	} else if (dest->addrtype == TIPC_ADDR_NAME) {
		u32 type = dest->addr.name.name.type;
		u32 inst = dest->addr.name.name.instance;
		u32 domain = dest->addr.name.domain;

		dnode = domain;
		msg_set_type(mhdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(mhdr, NAMED_H_SIZE);
		msg_set_nametype(mhdr, type);
		msg_set_nameinst(mhdr, inst);
		msg_set_lookup_scope(mhdr, tipc_addr_scope(domain));
905
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
906 907
		msg_set_destnode(mhdr, dnode);
		msg_set_destport(mhdr, dport);
908 909
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
910 911 912 913 914 915 916 917 918
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
		msg_set_type(mhdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(mhdr, 0);
		msg_set_destnode(mhdr, dnode);
		msg_set_destport(mhdr, dest->addr.id.ref);
		msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
	}

A
Al Viro 已提交
919
	save = m->msg_iter;
920
new_mtu:
921
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
922
	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
923
	if (rc < 0)
924
		return rc;
925 926

	do {
927
		skb = skb_peek(pktchain);
928
		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
929
		rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid);
930 931
		if (likely(rc >= 0)) {
			if (sock->state != SS_READY)
932
				sock->state = SS_CONNECTING;
933
			rc = dsz;
934
			break;
935
		}
A
Al Viro 已提交
936 937
		if (rc == -EMSGSIZE) {
			m->msg_iter = save;
938
			goto new_mtu;
A
Al Viro 已提交
939
		}
940
		if (rc != -ELINKCONG)
941
			break;
942
		tsk->link_cong = 1;
943
		rc = tipc_wait_for_sndmsg(sock, &timeo);
944
		if (rc)
945
			__skb_queue_purge(pktchain);
946 947 948
	} while (!rc);

	return rc;
P
Per Liden 已提交
949 950
}

951 952 953
static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
{
	struct sock *sk = sock->sk;
954
	struct tipc_sock *tsk = tipc_sk(sk);
955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
	DEFINE_WAIT(wait);
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (sock->state == SS_DISCONNECTING)
			return -EPIPE;
		else if (sock->state != SS_CONNECTED)
			return -ENOTCONN;
		if (!*timeo_p)
			return -EAGAIN;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
		done = sk_wait_event(sk, timeo_p,
973
				     (!tsk->link_cong &&
974 975
				      !tsk_conn_cong(tsk)) ||
				     !tsk->connected);
976 977 978 979 980
		finish_wait(sk_sleep(sk), &wait);
	} while (!done);
	return 0;
}

981
/**
982
 * tipc_send_stream - send stream-oriented data
P
Per Liden 已提交
983
 * @sock: socket structure
984 985
 * @m: data to send
 * @dsz: total length of data to be transmitted
986
 *
987
 * Used for SOCK_STREAM data.
988
 *
989 990
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
991
 */
992
static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
993 994 995 996 997 998 999 1000 1001 1002 1003 1004
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_send_stream(sock, m, dsz);
	release_sock(sk);

	return ret;
}

static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1005
{
1006
	struct sock *sk = sock->sk;
1007
	struct net *net = sock_net(sk);
1008
	struct tipc_sock *tsk = tipc_sk(sk);
1009
	struct tipc_msg *mhdr = &tsk->phdr;
1010
	struct sk_buff_head *pktchain = &sk->sk_write_queue;
1011
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1012
	u32 portid = tsk->portid;
1013
	int rc = -EINVAL;
1014
	long timeo;
1015 1016
	u32 dnode;
	uint mtu, send, sent = 0;
A
Al Viro 已提交
1017
	struct iov_iter save;
P
Per Liden 已提交
1018 1019

	/* Handle implied connection establishment */
1020
	if (unlikely(dest)) {
1021
		rc = __tipc_sendmsg(sock, m, dsz);
1022
		if (dsz && (dsz == rc))
1023
			tsk->sent_unacked = 1;
1024 1025 1026
		return rc;
	}
	if (dsz > (uint)INT_MAX)
1027 1028
		return -EMSGSIZE;

1029 1030
	if (unlikely(sock->state != SS_CONNECTED)) {
		if (sock->state == SS_DISCONNECTING)
1031
			return -EPIPE;
1032
		else
1033
			return -ENOTCONN;
1034
	}
1035

1036
	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
1037
	dnode = tsk_peer_node(tsk);
1038 1039

next:
A
Al Viro 已提交
1040
	save = m->msg_iter;
1041
	mtu = tsk->max_pkt;
1042
	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
1043
	rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
1044
	if (unlikely(rc < 0))
1045
		return rc;
1046
	do {
1047
		if (likely(!tsk_conn_cong(tsk))) {
1048
			rc = tipc_link_xmit(net, pktchain, dnode, portid);
1049
			if (likely(!rc)) {
1050
				tsk->sent_unacked++;
1051 1052 1053 1054 1055 1056
				sent += send;
				if (sent == dsz)
					break;
				goto next;
			}
			if (rc == -EMSGSIZE) {
1057 1058
				tsk->max_pkt = tipc_node_get_mtu(net, dnode,
								 portid);
A
Al Viro 已提交
1059
				m->msg_iter = save;
1060 1061 1062 1063
				goto next;
			}
			if (rc != -ELINKCONG)
				break;
1064
			tsk->link_cong = 1;
1065 1066
		}
		rc = tipc_wait_for_sndpkt(sock, &timeo);
1067
		if (rc)
1068
			__skb_queue_purge(pktchain);
1069
	} while (!rc);
1070

1071
	return sent ? sent : rc;
P
Per Liden 已提交
1072 1073
}

1074
/**
1075
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1076
 * @sock: socket structure
1077 1078
 * @m: message to send
 * @dsz: length of data to be transmitted
1079
 *
1080
 * Used for SOCK_SEQPACKET messages.
1081
 *
1082
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1083
 */
1084
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1085
{
1086 1087
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1088

1089
	return tipc_send_stream(sock, m, dsz);
P
Per Liden 已提交
1090 1091
}

1092
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1093
 */
1094
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1095
				u32 peer_node)
P
Per Liden 已提交
1096
{
1097 1098
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1099
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1100

1101 1102 1103 1104 1105
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1106

1107
	tsk->probing_intv = CONN_PROBING_INTERVAL;
1108 1109
	tsk->probing_state = TIPC_CONN_OK;
	tsk->connected = 1;
1110
	sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
1111 1112
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
P
Per Liden 已提交
1113 1114 1115 1116 1117 1118
}

/**
 * set_orig_addr - capture sender's address for received message
 * @m: descriptor for message info
 * @msg: received message header
1119
 *
P
Per Liden 已提交
1120 1121
 * Note: Address is not captured if not requested by receiver.
 */
S
Sam Ravnborg 已提交
1122
static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
P
Per Liden 已提交
1123
{
1124
	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
P
Per Liden 已提交
1125

1126
	if (addr) {
P
Per Liden 已提交
1127 1128
		addr->family = AF_TIPC;
		addr->addrtype = TIPC_ADDR_ID;
1129
		memset(&addr->addr, 0, sizeof(addr->addr));
P
Per Liden 已提交
1130 1131
		addr->addr.id.ref = msg_origport(msg);
		addr->addr.id.node = msg_orignode(msg);
1132 1133
		addr->addr.name.domain = 0;	/* could leave uninitialized */
		addr->scope = 0;		/* could leave uninitialized */
P
Per Liden 已提交
1134 1135 1136 1137 1138
		m->msg_namelen = sizeof(struct sockaddr_tipc);
	}
}

/**
1139
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1140 1141
 * @m: descriptor for message info
 * @msg: received message header
1142
 * @tsk: TIPC port associated with message
1143
 *
P
Per Liden 已提交
1144
 * Note: Ancillary data is not captured if not requested by receiver.
1145
 *
P
Per Liden 已提交
1146 1147
 * Returns 0 if successful, otherwise errno
 */
1148 1149
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1150 1151 1152 1153
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1154
	int has_name;
P
Per Liden 已提交
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1165 1166
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1167
			return res;
1168 1169 1170 1171 1172 1173
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1174 1175 1176 1177 1178 1179
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1180
		has_name = 1;
P
Per Liden 已提交
1181 1182 1183 1184 1185
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1186
		has_name = 1;
P
Per Liden 已提交
1187 1188 1189 1190 1191
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1192 1193 1194 1195
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1196 1197
		break;
	default:
1198
		has_name = 0;
P
Per Liden 已提交
1199
	}
1200 1201 1202 1203 1204
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1205 1206 1207 1208

	return 0;
}

1209
static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
1210
{
1211
	struct net *net = sock_net(&tsk->sk);
1212
	struct sk_buff *skb = NULL;
1213
	struct tipc_msg *msg;
1214 1215
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1216

1217
	if (!tsk->connected)
1218
		return;
1219 1220 1221
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1222
	if (!skb)
1223
		return;
1224
	msg = buf_msg(skb);
1225
	msg_set_msgcnt(msg, ack);
1226
	tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1227 1228
}

1229
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1230 1231 1232
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1233
	long timeo = *timeop;
Y
Ying Xue 已提交
1234 1235 1236 1237
	int err;

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1238
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
			if (sock->state == SS_DISCONNECTING) {
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1253 1254 1255
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
1256 1257
	}
	finish_wait(sk_sleep(sk), &wait);
1258
	*timeop = timeo;
Y
Ying Xue 已提交
1259 1260 1261
	return err;
}

1262
/**
1263
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1264 1265 1266
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1267
 *
P
Per Liden 已提交
1268 1269 1270 1271 1272
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1273 1274
static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
			int flags)
P
Per Liden 已提交
1275
{
1276
	struct sock *sk = sock->sk;
1277
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1278 1279
	struct sk_buff *buf;
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1280
	long timeo;
P
Per Liden 已提交
1281 1282 1283 1284
	unsigned int sz;
	u32 err;
	int res;

1285
	/* Catch invalid receive requests */
P
Per Liden 已提交
1286 1287 1288
	if (unlikely(!buf_len))
		return -EINVAL;

1289
	lock_sock(sk);
P
Per Liden 已提交
1290

1291 1292
	if (unlikely(sock->state == SS_UNCONNECTED)) {
		res = -ENOTCONN;
P
Per Liden 已提交
1293 1294 1295
		goto exit;
	}

Y
Ying Xue 已提交
1296
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1297
restart:
P
Per Liden 已提交
1298

1299
	/* Look for a message in receive queue; wait if necessary */
1300
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1301 1302
	if (res)
		goto exit;
P
Per Liden 已提交
1303

1304 1305
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1306 1307 1308 1309 1310 1311
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1312
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1313 1314 1315 1316 1317 1318 1319
		goto restart;
	}

	/* Capture sender's address (optional) */
	set_orig_addr(m, msg);

	/* Capture ancillary data (optional) */
1320
	res = tipc_sk_anc_data_recv(m, msg, tsk);
1321
	if (res)
P
Per Liden 已提交
1322 1323 1324 1325 1326 1327 1328 1329
		goto exit;

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
		if (unlikely(buf_len < sz)) {
			sz = buf_len;
			m->msg_flags |= MSG_TRUNC;
		}
1330
		res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz);
1331
		if (res)
P
Per Liden 已提交
1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
			goto exit;
		res = sz;
	} else {
		if ((sock->state == SS_READY) ||
		    ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
			res = 0;
		else
			res = -ECONNRESET;
	}

	/* Consume received message (optional) */
	if (likely(!(flags & MSG_PEEK))) {
1344
		if ((sock->state != SS_READY) &&
1345
		    (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
1346
			tipc_sk_send_ack(tsk, tsk->rcv_unacked);
1347 1348
			tsk->rcv_unacked = 0;
		}
1349
		tsk_advance_rx_queue(sk);
1350
	}
P
Per Liden 已提交
1351
exit:
1352
	release_sock(sk);
P
Per Liden 已提交
1353 1354 1355
	return res;
}

1356
/**
1357
 * tipc_recv_stream - receive stream-oriented data
P
Per Liden 已提交
1358 1359 1360
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1361 1362
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1363 1364 1365 1366
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1367 1368
static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
			    size_t buf_len, int flags)
P
Per Liden 已提交
1369
{
1370
	struct sock *sk = sock->sk;
1371
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1372 1373
	struct sk_buff *buf;
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1374
	long timeo;
P
Per Liden 已提交
1375
	unsigned int sz;
1376
	int sz_to_copy, target, needed;
P
Per Liden 已提交
1377 1378
	int sz_copied = 0;
	u32 err;
1379
	int res = 0;
P
Per Liden 已提交
1380

1381
	/* Catch invalid receive attempts */
P
Per Liden 已提交
1382 1383 1384
	if (unlikely(!buf_len))
		return -EINVAL;

1385
	lock_sock(sk);
P
Per Liden 已提交
1386

Y
Ying Xue 已提交
1387
	if (unlikely(sock->state == SS_UNCONNECTED)) {
1388
		res = -ENOTCONN;
P
Per Liden 已提交
1389 1390 1391
		goto exit;
	}

1392
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
Y
Ying Xue 已提交
1393
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1394

1395
restart:
1396
	/* Look for a message in receive queue; wait if necessary */
1397
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1398 1399
	if (res)
		goto exit;
P
Per Liden 已提交
1400

1401 1402
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1403 1404 1405 1406 1407 1408
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1409
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1410 1411 1412 1413 1414 1415
		goto restart;
	}

	/* Optionally capture sender's address & ancillary data of first msg */
	if (sz_copied == 0) {
		set_orig_addr(m, msg);
1416
		res = tipc_sk_anc_data_recv(m, msg, tsk);
1417
		if (res)
P
Per Liden 已提交
1418 1419 1420 1421 1422
			goto exit;
	}

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
1423
		u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
P
Per Liden 已提交
1424

1425
		sz -= offset;
P
Per Liden 已提交
1426 1427
		needed = (buf_len - sz_copied);
		sz_to_copy = (sz <= needed) ? sz : needed;
1428

1429 1430
		res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset,
					    m, sz_to_copy);
1431
		if (res)
P
Per Liden 已提交
1432
			goto exit;
1433

P
Per Liden 已提交
1434 1435 1436 1437
		sz_copied += sz_to_copy;

		if (sz_to_copy < sz) {
			if (!(flags & MSG_PEEK))
1438 1439
				TIPC_SKB_CB(buf)->handle =
				(void *)(unsigned long)(offset + sz_to_copy);
P
Per Liden 已提交
1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453
			goto exit;
		}
	} else {
		if (sz_copied != 0)
			goto exit; /* can't add error msg to valid data */

		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
			res = 0;
		else
			res = -ECONNRESET;
	}

	/* Consume received message (optional) */
	if (likely(!(flags & MSG_PEEK))) {
1454
		if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
1455
			tipc_sk_send_ack(tsk, tsk->rcv_unacked);
1456 1457
			tsk->rcv_unacked = 0;
		}
1458
		tsk_advance_rx_queue(sk);
1459
	}
P
Per Liden 已提交
1460 1461

	/* Loop around if more data is required */
1462 1463
	if ((sz_copied < buf_len) &&	/* didn't get all requested data */
	    (!skb_queue_empty(&sk->sk_receive_queue) ||
1464
	    (sz_copied < target)) &&	/* and more is ready or required */
1465 1466
	    (!(flags & MSG_PEEK)) &&	/* and aren't just peeking at data */
	    (!err))			/* and haven't reached a FIN */
P
Per Liden 已提交
1467 1468 1469
		goto restart;

exit:
1470
	release_sock(sk);
1471
	return sz_copied ? sz_copied : res;
P
Per Liden 已提交
1472 1473
}

1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
	if (wq_has_sleeper(wq))
		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
						POLLWRNORM | POLLWRBAND);
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1495
static void tipc_data_ready(struct sock *sk)
1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
	if (wq_has_sleeper(wq))
		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
						POLLRDNORM | POLLRDBAND);
	rcu_read_unlock();
}

1507 1508
/**
 * filter_connect - Handle all incoming messages for a connection-based socket
1509
 * @tsk: TIPC socket
1510
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1511
 *
S
stephen hemminger 已提交
1512
 * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise
1513
 */
1514
static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb)
1515
{
1516
	struct sock *sk = &tsk->sk;
1517
	struct net *net = sock_net(sk);
1518
	struct socket *sock = sk->sk_socket;
1519
	struct tipc_msg *msg = buf_msg(*skb);
1520
	int retval = -TIPC_ERR_NO_PORT;
1521 1522 1523 1524 1525 1526 1527

	if (msg_mcast(msg))
		return retval;

	switch ((int)sock->state) {
	case SS_CONNECTED:
		/* Accept only connection-based messages sent by peer */
1528
		if (tsk_peer_msg(tsk, msg)) {
1529 1530
			if (unlikely(msg_errcode(msg))) {
				sock->state = SS_DISCONNECTING;
1531
				tsk->connected = 0;
1532
				/* let timer expire on it's own */
1533
				tipc_node_remove_conn(net, tsk_peer_node(tsk),
1534
						      tsk->portid);
1535 1536 1537 1538 1539 1540
			}
			retval = TIPC_OK;
		}
		break;
	case SS_CONNECTING:
		/* Accept only ACK or NACK message */
1541 1542 1543 1544

		if (unlikely(!msg_connected(msg)))
			break;

1545 1546
		if (unlikely(msg_errcode(msg))) {
			sock->state = SS_DISCONNECTING;
1547
			sk->sk_err = ECONNREFUSED;
1548 1549 1550 1551
			retval = TIPC_OK;
			break;
		}

1552
		if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) {
1553
			sock->state = SS_DISCONNECTING;
1554
			sk->sk_err = EINVAL;
1555
			retval = TIPC_OK;
1556 1557 1558
			break;
		}

1559 1560
		tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg));
		msg_set_importance(&tsk->phdr, msg_importance(msg));
1561 1562
		sock->state = SS_CONNECTED;

1563 1564 1565 1566 1567 1568
		/* If an incoming message is an 'ACK-', it should be
		 * discarded here because it doesn't contain useful
		 * data. In addition, we should try to wake up
		 * connect() routine if sleeping.
		 */
		if (msg_data_sz(msg) == 0) {
1569 1570
			kfree_skb(*skb);
			*skb = NULL;
1571 1572 1573 1574
			if (waitqueue_active(sk_sleep(sk)))
				wake_up_interruptible(sk_sleep(sk));
		}
		retval = TIPC_OK;
1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
		break;
	case SS_LISTENING:
	case SS_UNCONNECTED:
		/* Accept only SYN message */
		if (!msg_connected(msg) && !(msg_errcode(msg)))
			retval = TIPC_OK;
		break;
	case SS_DISCONNECTING:
		break;
	default:
		pr_err("Unknown socket state %u\n", sock->state);
	}
	return retval;
}

1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
 * @buf: message
 *
 * For all connection oriented messages, irrespective of importance,
 * the default overload value (i.e. 67MB) is set as limit.
 *
 * For all connectionless messages, by default new queue limits are
 * as belows:
 *
1601 1602 1603 1604
 * TIPC_LOW_IMPORTANCE       (4 MB)
 * TIPC_MEDIUM_IMPORTANCE    (8 MB)
 * TIPC_HIGH_IMPORTANCE      (16 MB)
 * TIPC_CRITICAL_IMPORTANCE  (32 MB)
1605 1606 1607 1608 1609 1610 1611 1612
 *
 * Returns overload limit according to corresponding message importance
 */
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
{
	struct tipc_msg *msg = buf_msg(buf);

	if (msg_connected(msg))
1613 1614 1615 1616
		return sysctl_tipc_rmem[2];

	return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
		msg_importance(msg);
1617 1618
}

1619
/**
1620 1621
 * filter_rcv - validate incoming message
 * @sk: socket
1622
 * @skb: pointer to message. Set to NULL if buffer is consumed.
1623
 *
1624 1625 1626
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
1627
 * Called with socket lock already taken
1628
 *
1629
 * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected
P
Per Liden 已提交
1630
 */
1631
static int filter_rcv(struct sock *sk, struct sk_buff **skb)
P
Per Liden 已提交
1632
{
1633
	struct socket *sock = sk->sk_socket;
1634
	struct tipc_sock *tsk = tipc_sk(sk);
1635 1636
	struct tipc_msg *msg = buf_msg(*skb);
	unsigned int limit = rcvbuf_limit(sk, *skb);
1637
	int rc = TIPC_OK;
P
Per Liden 已提交
1638

1639 1640 1641 1642
	if (unlikely(msg_user(msg) == CONN_MANAGER)) {
		tipc_sk_proto_rcv(tsk, skb);
		return TIPC_OK;
	}
1643

1644
	if (unlikely(msg_user(msg) == SOCK_WAKEUP)) {
1645
		kfree_skb(*skb);
1646 1647
		tsk->link_cong = 0;
		sk->sk_write_space(sk);
1648
		*skb = NULL;
1649 1650 1651
		return TIPC_OK;
	}

P
Per Liden 已提交
1652
	/* Reject message if it is wrong sort of message for socket */
1653
	if (msg_type(msg) > TIPC_DIRECT_MSG)
1654
		return -TIPC_ERR_NO_PORT;
1655

P
Per Liden 已提交
1656
	if (sock->state == SS_READY) {
1657
		if (msg_connected(msg))
1658
			return -TIPC_ERR_NO_PORT;
P
Per Liden 已提交
1659
	} else {
1660 1661
		rc = filter_connect(tsk, skb);
		if (rc != TIPC_OK || !*skb)
1662
			return rc;
P
Per Liden 已提交
1663 1664 1665
	}

	/* Reject message if there isn't room to queue it */
1666
	if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit)
1667
		return -TIPC_ERR_OVERLOAD;
P
Per Liden 已提交
1668

1669
	/* Enqueue message */
1670 1671 1672
	TIPC_SKB_CB(*skb)->handle = NULL;
	__skb_queue_tail(&sk->sk_receive_queue, *skb);
	skb_set_owner_r(*skb, sk);
1673

1674
	sk->sk_data_ready(sk);
1675
	*skb = NULL;
1676 1677
	return TIPC_OK;
}
P
Per Liden 已提交
1678

1679
/**
1680
 * tipc_backlog_rcv - handle incoming message from backlog queue
1681
 * @sk: socket
1682
 * @skb: message
1683
 *
1684
 * Caller must hold socket lock
1685 1686 1687
 *
 * Returns 0
 */
1688
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1689
{
1690 1691 1692
	int err;
	atomic_t *dcnt;
	u32 dnode;
1693
	struct tipc_sock *tsk = tipc_sk(sk);
1694
	struct net *net = sock_net(sk);
1695
	uint truesize = skb->truesize;
1696

1697 1698 1699 1700 1701
	err = filter_rcv(sk, &skb);
	if (likely(!skb)) {
		dcnt = &tsk->dupl_rcvcnt;
		if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT)
			atomic_add(truesize, dcnt);
1702 1703
		return 0;
	}
1704 1705
	if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err))
		tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
1706 1707 1708
	return 0;
}

1709
/**
1710 1711 1712 1713 1714 1715
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
 * @_skb: returned buffer to be forwarded or rejected, if applicable
1716 1717 1718
 *
 * Caller must hold socket lock
 *
1719 1720
 * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD
 * or -TIPC_ERR_NO_PORT
1721
 */
1722 1723
static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
			   u32 dport, struct sk_buff **_skb)
1724 1725 1726
{
	unsigned int lim;
	atomic_t *dcnt;
1727 1728 1729 1730 1731
	int err;
	struct sk_buff *skb;
	unsigned long time_limit = jiffies + 2;

	while (skb_queue_len(inputq)) {
1732 1733
		if (unlikely(time_after_eq(jiffies, time_limit)))
			return TIPC_OK;
1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
			return TIPC_OK;
		if (!sock_owned_by_user(sk)) {
			err = filter_rcv(sk, &skb);
			if (likely(!skb))
				continue;
			*_skb = skb;
			return err;
		}
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
		if (sk->sk_backlog.len)
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
		*_skb = skb;
1751
		return -TIPC_ERR_OVERLOAD;
1752
	}
1753 1754 1755
	return TIPC_OK;
}

1756
/**
1757 1758 1759 1760 1761 1762
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
 * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH
 * Only node local calls check the return value, sending single-buffer queues
1763
 */
1764
int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1765
{
1766
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
1767
	int err;
1768
	struct sk_buff *skb;
1769
	struct tipc_sock *tsk;
1770
	struct tipc_net *tn;
1771 1772
	struct sock *sk;

1773
	while (skb_queue_len(inputq)) {
E
Erik Hugne 已提交
1774
		err = -TIPC_ERR_NO_PORT;
1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799
		skb = NULL;
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
				err = tipc_sk_enqueue(inputq, sk, dport, &skb);
				spin_unlock_bh(&sk->sk_lock.slock);
				dport = 0;
			}
			sock_put(sk);
		} else {
			skb = tipc_skb_dequeue(inputq, dport);
		}
		if (likely(!skb))
			continue;
		if (tipc_msg_lookup_dest(net, skb, &dnode, &err))
			goto xmit;
		if (!err) {
			dnode = msg_destnode(buf_msg(skb));
			goto xmit;
		}
		tn = net_generic(net, tipc_net_id);
		if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err))
			continue;
1800
xmit:
1801 1802
		tipc_link_xmit_skb(net, skb, dnode, dport);
	}
1803
	return err ? -EHOSTUNREACH : 0;
P
Per Liden 已提交
1804 1805
}

Y
Ying Xue 已提交
1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
		done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING);
		finish_wait(sk_sleep(sk), &wait);
	} while (!done);
	return 0;
}

P
Per Liden 已提交
1828
/**
1829
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
1830 1831 1832
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
1833
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
1834 1835 1836
 *
 * Returns 0 on success, errno otherwise
 */
1837 1838
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
1839
{
1840
	struct sock *sk = sock->sk;
1841
	struct tipc_sock *tsk = tipc_sk(sk);
1842 1843
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
1844
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
Y
Ying Xue 已提交
1845
	socket_state previous;
1846
	int res = 0;
1847

1848 1849
	lock_sock(sk);

1850
	/* DGRAM/RDM connect(), just save the destaddr */
1851
	if (sock->state == SS_READY) {
1852 1853 1854
		if (dst->family == AF_UNSPEC) {
			memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc));
			tsk->connected = 0;
1855 1856
		} else if (destlen != sizeof(struct sockaddr_tipc)) {
			res = -EINVAL;
1857 1858 1859 1860
		} else {
			memcpy(&tsk->remote, dest, destlen);
			tsk->connected = 1;
		}
1861 1862
		goto exit;
	}
1863 1864 1865 1866 1867 1868 1869

	/*
	 * Reject connection attempt using multicast address
	 *
	 * Note: send_msg() validates the rest of the address fields,
	 *       so there's no need to do it here
	 */
1870 1871 1872 1873 1874
	if (dst->addrtype == TIPC_ADDR_MCAST) {
		res = -EINVAL;
		goto exit;
	}

Y
Ying Xue 已提交
1875
	previous = sock->state;
1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887
	switch (sock->state) {
	case SS_UNCONNECTED:
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

1888
		res = __tipc_sendmsg(sock, &m, 0);
1889 1890 1891 1892 1893 1894 1895 1896 1897
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

		/* Just entered SS_CONNECTING state; the only
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
	case SS_CONNECTING:
Y
Ying Xue 已提交
1898 1899 1900 1901 1902 1903 1904
		if (previous == SS_CONNECTING)
			res = -EALREADY;
		if (!timeout)
			goto exit;
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
1905 1906 1907 1908 1909 1910
		break;
	case SS_CONNECTED:
		res = -EISCONN;
		break;
	default:
		res = -EINVAL;
Y
Ying Xue 已提交
1911
		break;
1912
	}
1913 1914
exit:
	release_sock(sk);
1915
	return res;
P
Per Liden 已提交
1916 1917
}

1918
/**
1919
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
1920 1921
 * @sock: socket structure
 * @len: (unused)
1922
 *
P
Per Liden 已提交
1923 1924
 * Returns 0 on success, errno otherwise
 */
1925
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
1926
{
1927 1928 1929 1930
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
P
Per Liden 已提交
1931

1932
	if (sock->state != SS_UNCONNECTED)
1933 1934 1935 1936 1937 1938 1939 1940
		res = -EINVAL;
	else {
		sock->state = SS_LISTENING;
		res = 0;
	}

	release_sock(sk);
	return res;
P
Per Liden 已提交
1941 1942
}

Y
Ying Xue 已提交
1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
1957
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EINVAL;
		if (sock->state != SS_LISTENING)
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1971 1972 1973
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
1974 1975 1976 1977 1978
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

1979
/**
1980
 * tipc_accept - wait for connection request
P
Per Liden 已提交
1981 1982 1983
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
1984
 *
P
Per Liden 已提交
1985 1986
 * Returns 0 on success, errno otherwise
 */
1987
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
P
Per Liden 已提交
1988
{
1989
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
1990
	struct sk_buff *buf;
1991
	struct tipc_sock *new_tsock;
1992
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1993
	long timeo;
1994
	int res;
P
Per Liden 已提交
1995

1996
	lock_sock(sk);
P
Per Liden 已提交
1997

1998 1999
	if (sock->state != SS_LISTENING) {
		res = -EINVAL;
P
Per Liden 已提交
2000 2001
		goto exit;
	}
Y
Ying Xue 已提交
2002 2003 2004 2005
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2006 2007 2008

	buf = skb_peek(&sk->sk_receive_queue);

2009
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
2010 2011
	if (res)
		goto exit;
P
Per Liden 已提交
2012

2013
	new_sk = new_sock->sk;
2014
	new_tsock = tipc_sk(new_sk);
2015
	msg = buf_msg(buf);
P
Per Liden 已提交
2016

2017 2018 2019 2020 2021 2022 2023
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2024
	tsk_rej_rx_queue(new_sk);
2025 2026

	/* Connect new socket to it's peer */
2027
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2028 2029
	new_sock->state = SS_CONNECTED;

2030
	tsk_set_importance(new_tsock, msg_importance(msg));
2031
	if (msg_named(msg)) {
2032 2033
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2034
	}
2035 2036 2037 2038 2039 2040 2041 2042

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2043
		tsk_advance_rx_queue(sk);
2044
		__tipc_send_stream(new_sock, &m, 0);
2045 2046 2047
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2048
		skb_set_owner_r(buf, new_sk);
2049 2050
	}
	release_sock(new_sk);
P
Per Liden 已提交
2051
exit:
2052
	release_sock(sk);
P
Per Liden 已提交
2053 2054 2055 2056
	return res;
}

/**
2057
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2058
 * @sock: socket structure
2059
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2060 2061
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2062
 *
P
Per Liden 已提交
2063 2064
 * Returns 0 on success, errno otherwise
 */
2065
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2066
{
2067
	struct sock *sk = sock->sk;
2068
	struct net *net = sock_net(sk);
2069
	struct tipc_sock *tsk = tipc_sk(sk);
2070
	struct sk_buff *skb;
2071
	u32 dnode;
P
Per Liden 已提交
2072 2073
	int res;

2074 2075
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2076

2077
	lock_sock(sk);
P
Per Liden 已提交
2078 2079

	switch (sock->state) {
2080
	case SS_CONNECTING:
P
Per Liden 已提交
2081 2082 2083
	case SS_CONNECTED:

restart:
2084
		/* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
2085 2086 2087 2088
		skb = __skb_dequeue(&sk->sk_receive_queue);
		if (skb) {
			if (TIPC_SKB_CB(skb)->handle != NULL) {
				kfree_skb(skb);
P
Per Liden 已提交
2089 2090
				goto restart;
			}
2091
			if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode,
2092
					     TIPC_CONN_SHUTDOWN))
2093 2094
				tipc_link_xmit_skb(net, skb, dnode,
						   tsk->portid);
2095
		} else {
2096
			dnode = tsk_peer_node(tsk);
2097 2098

			skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
2099
					      TIPC_CONN_MSG, SHORT_H_SIZE,
2100
					      0, dnode, tsk_own_node(tsk),
2101
					      tsk_peer_port(tsk),
2102
					      tsk->portid, TIPC_CONN_SHUTDOWN);
2103
			tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
P
Per Liden 已提交
2104
		}
2105
		tsk->connected = 0;
2106
		sock->state = SS_DISCONNECTING;
2107
		tipc_node_remove_conn(net, dnode, tsk->portid);
P
Per Liden 已提交
2108 2109 2110 2111
		/* fall through */

	case SS_DISCONNECTING:

2112
		/* Discard any unreceived messages */
2113
		__skb_queue_purge(&sk->sk_receive_queue);
2114 2115 2116

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2117 2118 2119 2120 2121 2122 2123
		res = 0;
		break;

	default:
		res = -ENOTCONN;
	}

2124
	release_sock(sk);
P
Per Liden 已提交
2125 2126 2127
	return res;
}

2128
static void tipc_sk_timeout(unsigned long data)
2129
{
2130 2131
	struct tipc_sock *tsk = (struct tipc_sock *)data;
	struct sock *sk = &tsk->sk;
2132
	struct sk_buff *skb = NULL;
2133
	u32 peer_port, peer_node;
2134
	u32 own_node = tsk_own_node(tsk);
2135

J
Jon Paul Maloy 已提交
2136
	bh_lock_sock(sk);
2137
	if (!tsk->connected) {
J
Jon Paul Maloy 已提交
2138 2139
		bh_unlock_sock(sk);
		goto exit;
2140
	}
2141 2142
	peer_port = tsk_peer_port(tsk);
	peer_node = tsk_peer_node(tsk);
2143

2144
	if (tsk->probing_state == TIPC_CONN_PROBING) {
2145
		/* Previous probe not answered -> self abort */
2146
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
2147
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0,
2148
				      own_node, peer_node, tsk->portid,
2149
				      peer_port, TIPC_ERR_NO_PORT);
2150
	} else {
2151 2152
		skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
				      INT_H_SIZE, 0, peer_node, own_node,
2153
				      peer_port, tsk->portid, TIPC_OK);
2154
		tsk->probing_state = TIPC_CONN_PROBING;
2155
		sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
2156 2157
	}
	bh_unlock_sock(sk);
2158
	if (skb)
2159
		tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
J
Jon Paul Maloy 已提交
2160
exit:
2161
	sock_put(sk);
2162 2163
}

2164
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2165 2166
			   struct tipc_name_seq const *seq)
{
2167
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2168 2169 2170
	struct publication *publ;
	u32 key;

2171
	if (tsk->connected)
J
Jon Paul Maloy 已提交
2172
		return -EINVAL;
2173 2174
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2175 2176
		return -EADDRINUSE;

2177
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2178
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2179 2180 2181
	if (unlikely(!publ))
		return -EINVAL;

2182 2183 2184
	list_add(&publ->pport_list, &tsk->publications);
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2185 2186 2187
	return 0;
}

2188
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2189 2190
			    struct tipc_name_seq const *seq)
{
2191
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2192 2193 2194 2195
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

2196
	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
J
Jon Paul Maloy 已提交
2197 2198 2199 2200 2201 2202 2203 2204 2205
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2206
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2207 2208 2209 2210
					      publ->ref, publ->key);
			rc = 0;
			break;
		}
2211
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2212 2213 2214
				      publ->ref, publ->key);
		rc = 0;
	}
2215 2216
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2217 2218 2219
	return rc;
}

2220 2221 2222
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2223
void tipc_sk_reinit(struct net *net)
2224
{
2225
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2226 2227 2228
	const struct bucket_table *tbl;
	struct rhash_head *pos;
	struct tipc_sock *tsk;
2229
	struct tipc_msg *msg;
2230
	int i;
2231

2232
	rcu_read_lock();
2233
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2234 2235 2236 2237
	for (i = 0; i < tbl->size; i++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2238 2239
			msg_set_prevnode(msg, tn->own_addr);
			msg_set_orignode(msg, tn->own_addr);
2240 2241
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2242
	}
2243
	rcu_read_unlock();
2244 2245
}

2246
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2247
{
2248
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2249
	struct tipc_sock *tsk;
2250

2251
	rcu_read_lock();
2252
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2253 2254 2255
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2256

2257
	return tsk;
2258 2259
}

2260
static int tipc_sk_insert(struct tipc_sock *tsk)
2261
{
2262 2263 2264
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2265 2266
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2267

2268 2269 2270 2271 2272 2273
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2274 2275
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2276 2277
			return 0;
		sock_put(&tsk->sk);
2278 2279
	}

2280
	return -1;
2281 2282
}

2283
static void tipc_sk_remove(struct tipc_sock *tsk)
2284
{
2285
	struct sock *sk = &tsk->sk;
2286
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2287

2288
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2289 2290
		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
		__sock_put(sk);
2291 2292 2293
	}
}

2294 2295 2296 2297 2298 2299 2300
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2301
	.automatic_shrinking = true,
2302 2303
};

2304
int tipc_sk_rht_init(struct net *net)
2305
{
2306
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2307 2308

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2309 2310
}

2311
void tipc_sk_rht_destroy(struct net *net)
2312
{
2313 2314
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2315 2316
	/* Wait for socket readers to complete */
	synchronize_net();
2317

2318
	rhashtable_destroy(&tn->sk_rht);
2319 2320
}

P
Per Liden 已提交
2321
/**
2322
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2323 2324 2325 2326 2327
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2328 2329
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2330
 * (to ease compatibility).
2331
 *
P
Per Liden 已提交
2332 2333
 * Returns 0 on success, errno otherwise
 */
2334 2335
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2336
{
2337
	struct sock *sk = sock->sk;
2338
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
2339 2340 2341
	u32 value;
	int res;

2342 2343
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2344 2345 2346 2347
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
	if (ol < sizeof(value))
		return -EINVAL;
2348 2349
	res = get_user(value, (u32 __user *)ov);
	if (res)
P
Per Liden 已提交
2350 2351
		return res;

2352
	lock_sock(sk);
2353

P
Per Liden 已提交
2354 2355
	switch (opt) {
	case TIPC_IMPORTANCE:
2356
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2357 2358 2359
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2360
			tsk_set_unreliable(tsk, value);
2361
		else
P
Per Liden 已提交
2362 2363 2364
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2365
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2366 2367
		break;
	case TIPC_CONN_TIMEOUT:
2368
		tipc_sk(sk)->conn_timeout = value;
2369
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2370 2371 2372 2373 2374
		break;
	default:
		res = -EINVAL;
	}

2375 2376
	release_sock(sk);

P
Per Liden 已提交
2377 2378 2379 2380
	return res;
}

/**
2381
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2382 2383 2384 2385 2386
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2387 2388
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2389
 * (to ease compatibility).
2390
 *
P
Per Liden 已提交
2391 2392
 * Returns 0 on success, errno otherwise
 */
2393 2394
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2395
{
2396
	struct sock *sk = sock->sk;
2397
	struct tipc_sock *tsk = tipc_sk(sk);
2398
	int len;
P
Per Liden 已提交
2399
	u32 value;
2400
	int res;
P
Per Liden 已提交
2401

2402 2403
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2404 2405
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2406 2407
	res = get_user(len, ol);
	if (res)
2408
		return res;
P
Per Liden 已提交
2409

2410
	lock_sock(sk);
P
Per Liden 已提交
2411 2412 2413

	switch (opt) {
	case TIPC_IMPORTANCE:
2414
		value = tsk_importance(tsk);
P
Per Liden 已提交
2415 2416
		break;
	case TIPC_SRC_DROPPABLE:
2417
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2418 2419
		break;
	case TIPC_DEST_DROPPABLE:
2420
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2421 2422
		break;
	case TIPC_CONN_TIMEOUT:
2423
		value = tsk->conn_timeout;
2424
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2425
		break;
2426
	case TIPC_NODE_RECVQ_DEPTH:
2427
		value = 0; /* was tipc_queue_size, now obsolete */
2428
		break;
2429
	case TIPC_SOCK_RECVQ_DEPTH:
2430 2431
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
P
Per Liden 已提交
2432 2433 2434 2435
	default:
		res = -EINVAL;
	}

2436 2437
	release_sock(sk);

2438 2439
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2440

2441 2442 2443 2444 2445 2446 2447
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2448 2449
}

2450
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2451
{
2452
	struct sock *sk = sock->sk;
E
Erik Hugne 已提交
2453 2454 2455 2456 2457 2458 2459
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
2460 2461
		if (!tipc_node_get_linkname(sock_net(sk),
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

2473 2474
/* Protocol switches for the various types of TIPC sockets */

2475
static const struct proto_ops msg_ops = {
2476
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2477
	.family		= AF_TIPC,
2478 2479 2480
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2481
	.socketpair	= sock_no_socketpair,
2482
	.accept		= sock_no_accept,
2483 2484
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2485
	.ioctl		= tipc_ioctl,
2486
	.listen		= sock_no_listen,
2487 2488 2489 2490 2491
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
2492 2493
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2494 2495
};

2496
static const struct proto_ops packet_ops = {
2497
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2498
	.family		= AF_TIPC,
2499 2500 2501
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2502
	.socketpair	= sock_no_socketpair,
2503 2504 2505
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2506
	.ioctl		= tipc_ioctl,
2507 2508 2509 2510 2511 2512
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
2513 2514
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2515 2516
};

2517
static const struct proto_ops stream_ops = {
2518
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2519
	.family		= AF_TIPC,
2520 2521 2522
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2523
	.socketpair	= sock_no_socketpair,
2524 2525 2526
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2527
	.ioctl		= tipc_ioctl,
2528 2529 2530 2531 2532 2533
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_stream,
	.recvmsg	= tipc_recv_stream,
2534 2535
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2536 2537
};

2538
static const struct net_proto_family tipc_family_ops = {
2539
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2540
	.family		= AF_TIPC,
2541
	.create		= tipc_sk_create
P
Per Liden 已提交
2542 2543 2544 2545 2546
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
2547 2548
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
2549 2550 2551
};

/**
2552
 * tipc_socket_init - initialize TIPC socket interface
2553
 *
P
Per Liden 已提交
2554 2555
 * Returns 0 on success, errno otherwise
 */
2556
int tipc_socket_init(void)
P
Per Liden 已提交
2557 2558 2559
{
	int res;

2560
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
2561
	if (res) {
2562
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
2563 2564 2565 2566 2567
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
2568
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
2569 2570 2571 2572 2573 2574 2575 2576
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
2577
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
2578
 */
2579
void tipc_socket_stop(void)
P
Per Liden 已提交
2580 2581 2582 2583
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
2584 2585

/* Caller should hold socket lock for the passed tipc socket. */
2586
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2621 2622
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
2623 2624 2625 2626
{
	int err;
	void *hdr;
	struct nlattr *attrs;
2627 2628
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2629 2630

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2631
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
2632 2633 2634 2635 2636 2637
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
2638
	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
2639
		goto attr_msg_cancel;
2640
	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667
		goto attr_msg_cancel;

	if (tsk->connected) {
		err = __tipc_nl_add_sk_con(skb, tsk);
		if (err)
			goto attr_msg_cancel;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			goto attr_msg_cancel;
	}
	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
	struct tipc_sock *tsk;
2668 2669
	const struct bucket_table *tbl;
	struct rhash_head *pos;
2670 2671
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2672 2673
	u32 tbl_id = cb->args[0];
	u32 prev_portid = cb->args[1];
2674

2675
	rcu_read_lock();
2676
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2677 2678
	for (; tbl_id < tbl->size; tbl_id++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
2679
			spin_lock_bh(&tsk->sk.sk_lock.slock);
2680 2681 2682 2683 2684
			if (prev_portid && prev_portid != tsk->portid) {
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				continue;
			}

2685
			err = __tipc_nl_add_sk(skb, cb, tsk);
2686 2687 2688 2689 2690 2691
			if (err) {
				prev_portid = tsk->portid;
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				goto out;
			}
			prev_portid = 0;
2692 2693
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2694
	}
2695
out:
2696
	rcu_read_unlock();
2697 2698
	cb->args[0] = tbl_id;
	cb->args[1] = prev_portid;
2699 2700 2701

	return skb->len;
}
2702 2703

/* Caller should hold socket lock for the passed tipc socket. */
2704 2705 2706
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
2707 2708 2709 2710 2711
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2712
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2743 2744 2745
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785
{
	int err;
	struct publication *p;

	if (*last_publ) {
		list_for_each_entry(p, &tsk->publications, pport_list) {
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
				     pport_list);
	}

	list_for_each_entry_from(p, &tsk->publications, pport_list) {
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
2786
	u32 tsk_portid = cb->args[0];
2787 2788
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
2789
	struct net *net = sock_net(skb->sk);
2790 2791
	struct tipc_sock *tsk;

2792
	if (!tsk_portid) {
2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
				       tipc_nl_sock_policy);
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

2809
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
2810 2811 2812 2813 2814
	}

	if (done)
		return 0;

2815
	tsk = tipc_sk_lookup(net, tsk_portid);
2816 2817 2818 2819 2820 2821 2822 2823
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
2824
	sock_put(&tsk->sk);
2825

2826
	cb->args[0] = tsk_portid;
2827 2828 2829 2830 2831
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}