socket.c 71.9 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
4
 * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
P
Per Liden 已提交
38
#include "core.h"
39
#include "name_table.h"
E
Erik Hugne 已提交
40
#include "node.h"
41
#include "link.h"
42
#include "name_distr.h"
43
#include "socket.h"
44
#include "bcast.h"
45
#include "netlink.h"
46

47
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
48
#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
49 50 51
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
52

53 54
enum {
	TIPC_LISTEN = TCP_LISTEN,
55
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
56
	TIPC_OPEN = TCP_CLOSE,
57
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
58
	TIPC_CONNECTING = TCP_SYN_SENT,
59 60
};

61 62 63 64 65 66 67
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
68
 * @portid: unique port identity in TIPC socket hash table
69
 * @phdr: preformatted message header used when sending messages
70
 * #cong_links: list of congested links
71
 * @publications: list of publications for port
72
 * @blocking_link: address of the congested link we are currently sleeping on
73 74 75 76
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
77
 * @cong_link_cnt: number of congested links
78 79
 * @sent_unacked: # messages sent by socket, and not yet acked by peer
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
80
 * @peer: 'connected' peer for dgram/rdm
81
 * @node: hash table node
82
 * @mc_method: cookie for use between socket and broadcast layer
83
 * @rcu: rcu struct for tipc_sock
84 85 86 87 88 89 90
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
91
	u32 portid;
92
	struct tipc_msg phdr;
93
	struct list_head cong_links;
94 95 96 97
	struct list_head publications;
	u32 pub_count;
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
98
	bool probe_unacked;
99
	u16 cong_link_cnt;
100 101
	u16 snt_unacked;
	u16 snd_win;
102
	u16 peer_caps;
103 104
	u16 rcv_unacked;
	u16 rcv_win;
105
	struct sockaddr_tipc peer;
106
	struct rhash_head node;
107
	struct tipc_mc_method mc_method;
108
	struct rcu_head rcu;
109
};
P
Per Liden 已提交
110

111
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
112
static void tipc_data_ready(struct sock *sk);
113
static void tipc_write_space(struct sock *sk);
114
static void tipc_sock_destruct(struct sock *sk);
115 116
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
117
static void tipc_sk_timeout(unsigned long data);
118
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
119
			   struct tipc_name_seq const *seq);
120
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
121
			    struct tipc_name_seq const *seq);
122
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
123 124
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
125
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
126
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
127

128 129 130
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
131
static struct proto tipc_proto;
132 133
static const struct rhashtable_params tsk_rht_params;

134 135 136 137 138
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

139
static u32 tsk_peer_node(struct tipc_sock *tsk)
140
{
141
	return msg_destnode(&tsk->phdr);
142 143
}

144
static u32 tsk_peer_port(struct tipc_sock *tsk)
145
{
146
	return msg_destport(&tsk->phdr);
147 148
}

149
static  bool tsk_unreliable(struct tipc_sock *tsk)
150
{
151
	return msg_src_droppable(&tsk->phdr) != 0;
152 153
}

154
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
155
{
156
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
157 158
}

159
static bool tsk_unreturnable(struct tipc_sock *tsk)
160
{
161
	return msg_dest_droppable(&tsk->phdr) != 0;
162 163
}

164
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
165
{
166
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
167 168
}

169
static int tsk_importance(struct tipc_sock *tsk)
170
{
171
	return msg_importance(&tsk->phdr);
172 173
}

174
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
175 176 177
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
178
	msg_set_importance(&tsk->phdr, (u32)imp);
179 180
	return 0;
}
181

182 183 184 185 186
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

187
static bool tsk_conn_cong(struct tipc_sock *tsk)
188
{
189
	return tsk->snt_unacked > tsk->snd_win;
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
}

/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
210 211
}

212
/**
213
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
214 215
 *
 * Caller must hold socket lock
P
Per Liden 已提交
216
 */
217
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
218
{
219
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
220 221
}

222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
238
/**
239
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
240 241
 *
 * Caller must hold socket lock
P
Per Liden 已提交
242
 */
243
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
244
{
245
	struct sk_buff *skb;
246

247 248
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
249 250
}

251 252
static bool tipc_sk_connected(struct sock *sk)
{
253
	return sk->sk_state == TIPC_ESTABLISHED;
254 255
}

256 257 258 259 260 261 262 263 264 265
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

266
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
267 268 269 270
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
271
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
272
{
273 274
	struct sock *sk = &tsk->sk;
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
275
	u32 peer_port = tsk_peer_port(tsk);
J
Jon Paul Maloy 已提交
276 277 278
	u32 orig_node;
	u32 peer_node;

279
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
280 281 282 283 284 285
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
286
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
287 288 289 290

	if (likely(orig_node == peer_node))
		return true;

291
	if (!orig_node && (peer_node == tn->own_addr))
J
Jon Paul Maloy 已提交
292 293
		return true;

294
	if (!peer_node && (orig_node == tn->own_addr))
J
Jon Paul Maloy 已提交
295 296 297 298 299
		return true;

	return false;
}

300 301 302 303 304 305 306 307 308
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
309
	int oldsk_state = sk->sk_state;
310 311 312
	int res = -EINVAL;

	switch (state) {
313 314 315
	case TIPC_OPEN:
		res = 0;
		break;
316
	case TIPC_LISTEN:
317
	case TIPC_CONNECTING:
318
		if (oldsk_state == TIPC_OPEN)
319 320
			res = 0;
		break;
321
	case TIPC_ESTABLISHED:
322
		if (oldsk_state == TIPC_CONNECTING ||
323
		    oldsk_state == TIPC_OPEN)
324 325
			res = 0;
		break;
326
	case TIPC_DISCONNECTING:
327
		if (oldsk_state == TIPC_CONNECTING ||
328 329 330
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
331 332 333 334 335 336 337 338
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

#define tipc_wait_for_cond(sock_, timeout_, condition_)			\
({								        \
	int rc_ = 0;							\
	int done_ = 0;							\
									\
	while (!(condition_) && !done_) {				\
		struct sock *sk_ = sock->sk;				\
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);		\
									\
		rc_ = tipc_sk_sock_err(sock_, timeout_);		\
		if (rc_)						\
			break;						\
		prepare_to_wait(sk_sleep(sk_), &wait_,			\
				TASK_INTERRUPTIBLE);			\
		done_ = sk_wait_event(sk_, timeout_,			\
				      (condition_), &wait_);		\
		remove_wait_queue(sk_sleep(sk_), &wait_);		\
	}								\
	rc_;								\
})

P
Per Liden 已提交
382
/**
383
 * tipc_sk_create - create a TIPC socket
384
 * @net: network namespace (must be default network)
P
Per Liden 已提交
385 386
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
387
 * @kern: caused by kernel or by userspace?
388
 *
389 390
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
391 392 393
 *
 * Returns 0 on success, errno otherwise
 */
394 395
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
396
{
397
	struct tipc_net *tn;
398
	const struct proto_ops *ops;
P
Per Liden 已提交
399
	struct sock *sk;
400
	struct tipc_sock *tsk;
401
	struct tipc_msg *msg;
402 403

	/* Validate arguments */
P
Per Liden 已提交
404 405 406 407 408
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
409
		ops = &stream_ops;
P
Per Liden 已提交
410 411
		break;
	case SOCK_SEQPACKET:
412
		ops = &packet_ops;
P
Per Liden 已提交
413 414 415
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
416
		ops = &msg_ops;
P
Per Liden 已提交
417
		break;
418 419
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
420 421
	}

422
	/* Allocate socket's protocol area */
423
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
424
	if (sk == NULL)
P
Per Liden 已提交
425 426
		return -ENOMEM;

427
	tsk = tipc_sk(sk);
428 429
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
430
	INIT_LIST_HEAD(&tsk->cong_links);
431
	msg = &tsk->phdr;
432 433
	tn = net_generic(sock_net(sk), tipc_net_id);
	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
434
		      NAMED_H_SIZE, 0);
P
Per Liden 已提交
435

436 437 438
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
439
	tipc_set_sk_state(sk, TIPC_OPEN);
440
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
441
		pr_warn("Socket create failed; port number exhausted\n");
442 443 444
		return -EINVAL;
	}
	msg_set_origport(msg, tsk->portid);
445
	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
446
	sk->sk_shutdown = 0;
447
	sk->sk_backlog_rcv = tipc_backlog_rcv;
448
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
449 450
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
451
	sk->sk_destruct = tipc_sock_destruct;
452 453
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
	atomic_set(&tsk->dupl_rcvcnt, 0);
454

455 456 457 458
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

459
	if (tipc_sk_type_connectionless(sk)) {
460
		tsk_set_unreturnable(tsk, true);
461
		if (sock->type == SOCK_DGRAM)
462
			tsk_set_unreliable(tsk, true);
463
	}
464

P
Per Liden 已提交
465 466 467
	return 0;
}

468 469 470 471 472 473 474
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

475 476 477 478 479 480
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
481
	long timeout = CONN_TIMEOUT_DEFAULT;
482 483 484
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

485 486 487 488
	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
					    !tsk_conn_cong(tsk)));

489 490 491 492 493 494
	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
495
			continue;
496
		}
497 498 499 500 501 502
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
503
	}
504 505 506 507

	if (tipc_sk_type_connectionless(sk))
		return;

508 509 510 511 512 513 514
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
515 516
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
517 518 519
	}
}

P
Per Liden 已提交
520
/**
521
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
522 523 524 525 526 527 528
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
529
 *
P
Per Liden 已提交
530 531 532 533 534 535
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
536
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
537 538
{
	struct sock *sk = sock->sk;
539
	struct tipc_sock *tsk;
P
Per Liden 已提交
540

541 542 543 544 545
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
546
		return 0;
547

548
	tsk = tipc_sk(sk);
549 550
	lock_sock(sk);

551 552
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
553
	tipc_sk_withdraw(tsk, 0, NULL);
554
	sk_stop_timer(sk, &sk->sk_timer);
555
	tipc_sk_remove(tsk);
P
Per Liden 已提交
556

557 558
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
559 560
	u32_list_purge(&tsk->cong_links);
	tsk->cong_link_cnt = 0;
561
	call_rcu(&tsk->rcu, tipc_sk_callback);
562
	sock->sk = NULL;
P
Per Liden 已提交
563

564
	return 0;
P
Per Liden 已提交
565 566 567
}

/**
568
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
569 570 571
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
572
 *
P
Per Liden 已提交
573 574 575
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
576
 *
P
Per Liden 已提交
577
 * Returns 0 on success, errno otherwise
578 579 580
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
581
 */
582 583
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
584
{
585
	struct sock *sk = sock->sk;
P
Per Liden 已提交
586
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
587
	struct tipc_sock *tsk = tipc_sk(sk);
588
	int res = -EINVAL;
P
Per Liden 已提交
589

590 591
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
592
		res = tipc_sk_withdraw(tsk, 0, NULL);
593 594
		goto exit;
	}
595

596 597 598 599 600 601 602 603
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
604 605 606

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
607 608 609 610
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
611

612
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
613
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
614 615 616 617
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
618

619
	res = (addr->scope > 0) ?
620 621
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
622 623 624
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
625 626
}

627
/**
628
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
629 630 631
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
632
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
633
 *
P
Per Liden 已提交
634
 * Returns 0 on success, errno otherwise
635
 *
636 637
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
638
 *       a completely predictable manner).
P
Per Liden 已提交
639
 */
640 641
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
			int *uaddr_len, int peer)
P
Per Liden 已提交
642 643
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
644 645
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
646
	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
P
Per Liden 已提交
647

648
	memset(addr, 0, sizeof(*addr));
649
	if (peer) {
650
		if ((!tipc_sk_connected(sk)) &&
651
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
652
			return -ENOTCONN;
653 654
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
655
	} else {
656
		addr->addr.id.ref = tsk->portid;
657
		addr->addr.id.node = tn->own_addr;
658
	}
P
Per Liden 已提交
659 660 661 662 663 664 665

	*uaddr_len = sizeof(*addr);
	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

666
	return 0;
P
Per Liden 已提交
667 668 669
}

/**
670
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
671 672 673 674
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
 * @wait: ???
 *
675 676 677 678 679 680 681 682
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
683 684 685
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
686
 */
687 688
static unsigned int tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
689
{
690
	struct sock *sk = sock->sk;
691
	struct tipc_sock *tsk = tipc_sk(sk);
692
	u32 mask = 0;
693

694
	sock_poll_wait(file, sk_sleep(sk), wait);
695

696 697 698 699 700
	if (sk->sk_shutdown & RCV_SHUTDOWN)
		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
	if (sk->sk_shutdown == SHUTDOWN_MASK)
		mask |= POLLHUP;

701 702
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
703
		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
704
			mask |= POLLOUT;
705 706 707
		/* fall thru' */
	case TIPC_LISTEN:
	case TIPC_CONNECTING:
708 709
		if (!skb_queue_empty(&sk->sk_receive_queue))
			mask |= (POLLIN | POLLRDNORM);
710 711
		break;
	case TIPC_OPEN:
712
		if (!tsk->cong_link_cnt)
713 714 715 716 717 718 719 720
			mask |= POLLOUT;
		if (tipc_sk_type_connectionless(sk) &&
		    (!skb_queue_empty(&sk->sk_receive_queue)))
			mask |= (POLLIN | POLLRDNORM);
		break;
	case TIPC_DISCONNECTING:
		mask = (POLLIN | POLLRDNORM | POLLHUP);
		break;
721
	}
722 723

	return mask;
P
Per Liden 已提交
724 725
}

726 727 728 729
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
730
 * @msg: message to send
731 732
 * @dlen: length of data to send
 * @timeout: timeout to wait for wakeup
733 734 735 736 737
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
738
			  struct msghdr *msg, size_t dlen, long timeout)
739 740
{
	struct sock *sk = sock->sk;
741
	struct tipc_sock *tsk = tipc_sk(sk);
742
	struct tipc_msg *hdr = &tsk->phdr;
743
	struct net *net = sock_net(sk);
744
	int mtu = tipc_bcast_get_mtu(net);
745
	struct tipc_mc_method *method = &tsk->mc_method;
746
	u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE);
747
	struct sk_buff_head pkts;
748
	struct tipc_nlist dsts;
749 750
	int rc;

751
	/* Block or return if any destination link is congested */
752 753 754
	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;
755

756 757 758 759 760 761 762 763
	/* Lookup destination nodes */
	tipc_nlist_init(&dsts, tipc_own_addr(net));
	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
				      seq->upper, domain, &dsts);
	if (!dsts.local && !dsts.remote)
		return -EHOSTUNREACH;

	/* Build message header */
764
	msg_set_type(hdr, TIPC_MCAST_MSG);
765
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
766 767 768 769 770 771 772
	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nametype(hdr, seq->type);
	msg_set_namelower(hdr, seq->lower);
	msg_set_nameupper(hdr, seq->upper);

773
	/* Build message as chain of buffers */
774 775
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
776

777 778
	/* Send message if build was successful */
	if (unlikely(rc == dlen))
779
		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
780 781 782
				     &tsk->cong_link_cnt);

	tipc_nlist_purge(&dsts);
783 784

	return rc ? rc : dlen;
785 786
}

787 788 789 790 791 792
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
793
 */
794 795
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
796
{
797
	struct tipc_msg *msg;
798
	struct list_head dports;
799
	u32 portid;
800
	u32 scope = TIPC_CLUSTER_SCOPE;
801 802 803
	struct sk_buff_head tmpq;
	uint hsz;
	struct sk_buff *skb, *_skb;
804

805
	__skb_queue_head_init(&tmpq);
806
	INIT_LIST_HEAD(&dports);
807

808 809 810 811 812 813 814 815 816 817 818 819
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
		msg = buf_msg(skb);
		hsz = skb_headroom(skb) + msg_hdr_sz(msg);

		if (in_own_node(net, msg_orignode(msg)))
			scope = TIPC_NODE_SCOPE;

		/* Create destination port list and message clones: */
		tipc_nametbl_mc_translate(net,
					  msg_nametype(msg), msg_namelower(msg),
					  msg_nameupper(msg), scope, &dports);
820 821
		portid = u32_pop(&dports);
		for (; portid; portid = u32_pop(&dports)) {
822 823 824 825 826 827 828
			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
829
		}
830 831 832 833 834 835 836 837 838
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
839
	}
840
	tipc_sk_rcv(net, inputq);
841 842
}

843 844 845
/**
 * tipc_sk_proto_rcv - receive a connection mng protocol message
 * @tsk: receiving socket
846
 * @skb: pointer to message buffer.
847
 */
J
Jon Paul Maloy 已提交
848 849
static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
			      struct sk_buff_head *xmitq)
850
{
851
	struct sock *sk = &tsk->sk;
J
Jon Paul Maloy 已提交
852
	u32 onode = tsk_own_node(tsk);
853 854
	struct tipc_msg *hdr = buf_msg(skb);
	int mtyp = msg_type(hdr);
855
	bool conn_cong;
856

857
	/* Ignore if connection cannot be validated: */
858
	if (!tsk_peer_msg(tsk, hdr))
859 860
		goto exit;

861
	tsk->probe_unacked = false;
862

863 864
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
865 866
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
867 868
		return;
	} else if (mtyp == CONN_ACK) {
869
		conn_cong = tsk_conn_cong(tsk);
870 871 872
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
873
		if (conn_cong)
874 875 876
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
877 878
	}
exit:
879
	kfree_skb(skb);
880 881
}

P
Per Liden 已提交
882
/**
883
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
884 885
 * @sock: socket structure
 * @m: message to send
886
 * @dsz: amount of user data to be sent
887
 *
P
Per Liden 已提交
888
 * Message must have an destination specified explicitly.
889
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
890 891
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
892
 *
P
Per Liden 已提交
893 894
 * Returns the number of bytes sent on success, or errno otherwise
 */
895
static int tipc_sendmsg(struct socket *sock,
896
			struct msghdr *m, size_t dsz)
897 898 899 900 901 902 903 904 905 906 907
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

908
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
909
{
910
	struct sock *sk = sock->sk;
911
	struct net *net = sock_net(sk);
912 913 914 915 916 917
	struct tipc_sock *tsk = tipc_sk(sk);
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct list_head *clinks = &tsk->cong_links;
	bool syn = !tipc_sk_type_connectionless(sk);
	struct tipc_msg *hdr = &tsk->phdr;
918
	struct tipc_name_seq *seq;
919 920 921 922
	struct sk_buff_head pkts;
	u32 type, inst, domain;
	u32 dnode, dport;
	int mtu, rc;
P
Per Liden 已提交
923

924
	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
925
		return -EMSGSIZE;
926

927
	if (unlikely(!dest)) {
928 929
		dest = &tsk->peer;
		if (!syn || dest->family != AF_TIPC)
930 931
			return -EDESTADDRREQ;
	}
932 933 934 935 936 937 938 939

	if (unlikely(m->msg_namelen < sizeof(*dest)))
		return -EINVAL;

	if (unlikely(dest->family != AF_TIPC))
		return -EINVAL;

	if (unlikely(syn)) {
940
		if (sk->sk_state == TIPC_LISTEN)
941
			return -EPIPE;
942
		if (sk->sk_state != TIPC_OPEN)
943 944 945
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
946
		if (dest->addrtype == TIPC_ADDR_NAME) {
947 948
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
949
		}
P
Per Liden 已提交
950
	}
951

952 953 954
	seq = &dest->addr.nameseq;
	if (dest->addrtype == TIPC_ADDR_MCAST)
		return tipc_sendmcast(sock, seq, m, dlen, timeout);
955

956 957 958 959
	if (dest->addrtype == TIPC_ADDR_NAME) {
		type = dest->addr.name.name.type;
		inst = dest->addr.name.name.instance;
		domain = dest->addr.name.domain;
960
		dnode = domain;
961 962 963 964 965
		msg_set_type(hdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
		msg_set_nametype(hdr, type);
		msg_set_nameinst(hdr, inst);
		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
966
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
967 968
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dport);
969 970
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
971

972 973
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
974 975 976 977 978
		msg_set_type(hdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(hdr, 0);
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dest->addr.id.ref);
		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
979 980
	}

981 982 983 984 985 986
	/* Block or return if destination link is congested */
	rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
	if (unlikely(rc))
		return rc;

	skb_queue_head_init(&pkts);
987
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
988 989
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
990
		return rc;
991

992 993 994 995 996 997
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
		u32_push(clinks, dnode);
		tsk->cong_link_cnt++;
		rc = 0;
	}
998

999 1000 1001 1002
	if (unlikely(syn && !rc))
		tipc_set_sk_state(sk, TIPC_CONNECTING);

	return rc ? rc : dlen;
P
Per Liden 已提交
1003 1004
}

1005
/**
1006
 * tipc_sendstream - send stream-oriented data
P
Per Liden 已提交
1007
 * @sock: socket structure
1008 1009
 * @m: data to send
 * @dsz: total length of data to be transmitted
1010
 *
1011
 * Used for SOCK_STREAM data.
1012
 *
1013 1014
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1015
 */
1016
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1017 1018 1019 1020 1021
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
1022
	ret = __tipc_sendstream(sock, m, dsz);
1023 1024 1025 1026 1027
	release_sock(sk);

	return ret;
}

1028
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1029
{
1030
	struct sock *sk = sock->sk;
1031
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1032 1033 1034 1035 1036 1037 1038 1039
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = &tsk->phdr;
	struct net *net = sock_net(sk);
	struct sk_buff_head pkts;
	u32 dnode = tsk_peer_node(tsk);
	int send, sent = 0;
	int rc = 0;
1040

1041
	skb_queue_head_init(&pkts);
1042

1043 1044
	if (unlikely(dlen > INT_MAX))
		return -EMSGSIZE;
1045

1046 1047 1048 1049 1050
	/* Handle implicit connection setup */
	if (unlikely(dest)) {
		rc = __tipc_sendmsg(sock, m, dlen);
		if (dlen && (dlen == rc))
			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1051
		return rc;
1052
	}
1053

1054
	do {
1055 1056
		rc = tipc_wait_for_cond(sock, &timeout,
					(!tsk->cong_link_cnt &&
1057 1058
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
		if (unlikely(rc))
			break;

		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
		if (unlikely(rc != send))
			break;

		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
		if (unlikely(rc == -ELINKCONG)) {
			tsk->cong_link_cnt = 1;
			rc = 0;
		}
		if (likely(!rc)) {
			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
			sent += send;
		}
	} while (sent < dlen && !rc);
1077

1078
	return rc ? rc : sent;
P
Per Liden 已提交
1079 1080
}

1081
/**
1082
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1083
 * @sock: socket structure
1084 1085
 * @m: message to send
 * @dsz: length of data to be transmitted
1086
 *
1087
 * Used for SOCK_SEQPACKET messages.
1088
 *
1089
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1090
 */
1091
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1092
{
1093 1094
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1095

1096
	return tipc_sendstream(sock, m, dsz);
P
Per Liden 已提交
1097 1098
}

1099
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1100
 */
1101
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1102
				u32 peer_node)
P
Per Liden 已提交
1103
{
1104 1105
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1106
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1107

1108 1109 1110 1111 1112
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1113

1114
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
1115
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1116 1117
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1118
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1119 1120 1121 1122 1123 1124
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1125 1126 1127 1128 1129 1130
}

/**
 * set_orig_addr - capture sender's address for received message
 * @m: descriptor for message info
 * @msg: received message header
1131
 *
P
Per Liden 已提交
1132 1133
 * Note: Address is not captured if not requested by receiver.
 */
S
Sam Ravnborg 已提交
1134
static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
P
Per Liden 已提交
1135
{
1136
	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
P
Per Liden 已提交
1137

1138
	if (addr) {
P
Per Liden 已提交
1139 1140
		addr->family = AF_TIPC;
		addr->addrtype = TIPC_ADDR_ID;
1141
		memset(&addr->addr, 0, sizeof(addr->addr));
P
Per Liden 已提交
1142 1143
		addr->addr.id.ref = msg_origport(msg);
		addr->addr.id.node = msg_orignode(msg);
1144 1145
		addr->addr.name.domain = 0;	/* could leave uninitialized */
		addr->scope = 0;		/* could leave uninitialized */
P
Per Liden 已提交
1146 1147 1148 1149 1150
		m->msg_namelen = sizeof(struct sockaddr_tipc);
	}
}

/**
1151
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1152 1153
 * @m: descriptor for message info
 * @msg: received message header
1154
 * @tsk: TIPC port associated with message
1155
 *
P
Per Liden 已提交
1156
 * Note: Ancillary data is not captured if not requested by receiver.
1157
 *
P
Per Liden 已提交
1158 1159
 * Returns 0 if successful, otherwise errno
 */
1160 1161
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1162 1163 1164 1165
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1166
	int has_name;
P
Per Liden 已提交
1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1177 1178
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1179
			return res;
1180 1181 1182 1183 1184 1185
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1186 1187 1188 1189 1190 1191
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1192
		has_name = 1;
P
Per Liden 已提交
1193 1194 1195 1196 1197
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1198
		has_name = 1;
P
Per Liden 已提交
1199 1200 1201 1202 1203
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1204 1205 1206 1207
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1208 1209
		break;
	default:
1210
		has_name = 0;
P
Per Liden 已提交
1211
	}
1212 1213 1214 1215 1216
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1217 1218 1219 1220

	return 0;
}

1221
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1222
{
1223 1224
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1225
	struct sk_buff *skb = NULL;
1226
	struct tipc_msg *msg;
1227 1228
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1229

1230
	if (!tipc_sk_connected(sk))
1231
		return;
1232 1233 1234
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1235
	if (!skb)
1236
		return;
1237
	msg = buf_msg(skb);
1238 1239 1240 1241 1242 1243 1244 1245
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1246
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1247 1248
}

1249
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1250 1251 1252
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1253
	long timeo = *timeop;
Y
Ying Xue 已提交
1254 1255 1256 1257
	int err;

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1258
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1259
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1273 1274 1275
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
1276 1277
	}
	finish_wait(sk_sleep(sk), &wait);
1278
	*timeop = timeo;
Y
Ying Xue 已提交
1279 1280 1281
	return err;
}

1282
/**
1283
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1284 1285 1286
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1287
 *
P
Per Liden 已提交
1288 1289 1290 1291 1292
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1293 1294
static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
			int flags)
P
Per Liden 已提交
1295
{
1296
	struct sock *sk = sock->sk;
1297
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1298 1299
	struct sk_buff *buf;
	struct tipc_msg *msg;
1300
	bool is_connectionless = tipc_sk_type_connectionless(sk);
Y
Ying Xue 已提交
1301
	long timeo;
P
Per Liden 已提交
1302 1303
	unsigned int sz;
	u32 err;
1304
	int res, hlen;
P
Per Liden 已提交
1305

1306
	/* Catch invalid receive requests */
P
Per Liden 已提交
1307 1308 1309
	if (unlikely(!buf_len))
		return -EINVAL;

1310
	lock_sock(sk);
P
Per Liden 已提交
1311

1312
	if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) {
1313
		res = -ENOTCONN;
P
Per Liden 已提交
1314 1315 1316
		goto exit;
	}

Y
Ying Xue 已提交
1317
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1318
restart:
P
Per Liden 已提交
1319

1320
	/* Look for a message in receive queue; wait if necessary */
1321
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1322 1323
	if (res)
		goto exit;
P
Per Liden 已提交
1324

1325 1326
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1327 1328
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
1329
	hlen = msg_hdr_sz(msg);
P
Per Liden 已提交
1330 1331 1332 1333
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1334
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1335 1336 1337 1338 1339 1340 1341
		goto restart;
	}

	/* Capture sender's address (optional) */
	set_orig_addr(m, msg);

	/* Capture ancillary data (optional) */
1342
	res = tipc_sk_anc_data_recv(m, msg, tsk);
1343
	if (res)
P
Per Liden 已提交
1344 1345 1346 1347 1348 1349 1350 1351
		goto exit;

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
		if (unlikely(buf_len < sz)) {
			sz = buf_len;
			m->msg_flags |= MSG_TRUNC;
		}
1352
		res = skb_copy_datagram_msg(buf, hlen, m, sz);
1353
		if (res)
P
Per Liden 已提交
1354 1355 1356
			goto exit;
		res = sz;
	} else {
1357 1358
		if (is_connectionless || err == TIPC_CONN_SHUTDOWN ||
		    m->msg_control)
P
Per Liden 已提交
1359 1360 1361 1362 1363
			res = 0;
		else
			res = -ECONNRESET;
	}

1364 1365 1366
	if (unlikely(flags & MSG_PEEK))
		goto exit;

1367
	if (likely(!is_connectionless)) {
1368 1369 1370
		tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
		if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
			tipc_sk_send_ack(tsk);
1371
	}
1372
	tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1373
exit:
1374
	release_sock(sk);
P
Per Liden 已提交
1375 1376 1377
	return res;
}

1378
/**
1379
 * tipc_recv_stream - receive stream-oriented data
P
Per Liden 已提交
1380 1381 1382
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1383 1384
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1385 1386 1387 1388
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1389 1390
static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
			    size_t buf_len, int flags)
P
Per Liden 已提交
1391
{
1392
	struct sock *sk = sock->sk;
1393
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1394 1395
	struct sk_buff *buf;
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1396
	long timeo;
P
Per Liden 已提交
1397
	unsigned int sz;
1398
	int target;
P
Per Liden 已提交
1399 1400
	int sz_copied = 0;
	u32 err;
1401
	int res = 0, hlen;
P
Per Liden 已提交
1402

1403
	/* Catch invalid receive attempts */
P
Per Liden 已提交
1404 1405 1406
	if (unlikely(!buf_len))
		return -EINVAL;

1407
	lock_sock(sk);
P
Per Liden 已提交
1408

1409
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1410
		res = -ENOTCONN;
P
Per Liden 已提交
1411 1412 1413
		goto exit;
	}

1414
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
Y
Ying Xue 已提交
1415
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1416

1417
restart:
1418
	/* Look for a message in receive queue; wait if necessary */
1419
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1420 1421
	if (res)
		goto exit;
P
Per Liden 已提交
1422

1423 1424
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1425 1426
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
1427
	hlen = msg_hdr_sz(msg);
P
Per Liden 已提交
1428 1429 1430 1431
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1432
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1433 1434 1435 1436 1437 1438
		goto restart;
	}

	/* Optionally capture sender's address & ancillary data of first msg */
	if (sz_copied == 0) {
		set_orig_addr(m, msg);
1439
		res = tipc_sk_anc_data_recv(m, msg, tsk);
1440
		if (res)
P
Per Liden 已提交
1441 1442 1443 1444 1445
			goto exit;
	}

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
1446 1447 1448
		u32 offset = TIPC_SKB_CB(buf)->bytes_read;
		u32 needed;
		int sz_to_copy;
P
Per Liden 已提交
1449

1450
		sz -= offset;
P
Per Liden 已提交
1451
		needed = (buf_len - sz_copied);
1452
		sz_to_copy = min(sz, needed);
1453

1454
		res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
1455
		if (res)
P
Per Liden 已提交
1456
			goto exit;
1457

P
Per Liden 已提交
1458 1459 1460 1461
		sz_copied += sz_to_copy;

		if (sz_to_copy < sz) {
			if (!(flags & MSG_PEEK))
1462 1463
				TIPC_SKB_CB(buf)->bytes_read =
					offset + sz_to_copy;
P
Per Liden 已提交
1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475
			goto exit;
		}
	} else {
		if (sz_copied != 0)
			goto exit; /* can't add error msg to valid data */

		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
			res = 0;
		else
			res = -ECONNRESET;
	}

1476 1477 1478 1479 1480 1481 1482
	if (unlikely(flags & MSG_PEEK))
		goto exit;

	tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
	if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
		tipc_sk_send_ack(tsk);
	tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1483 1484

	/* Loop around if more data is required */
1485 1486
	if ((sz_copied < buf_len) &&	/* didn't get all requested data */
	    (!skb_queue_empty(&sk->sk_receive_queue) ||
1487
	    (sz_copied < target)) &&	/* and more is ready or required */
1488
	    (!err))			/* and haven't reached a FIN */
P
Per Liden 已提交
1489 1490 1491
		goto restart;

exit:
1492
	release_sock(sk);
1493
	return sz_copied ? sz_copied : res;
P
Per Liden 已提交
1494 1495
}

1496 1497 1498 1499 1500 1501 1502 1503 1504 1505
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1506
	if (skwq_has_sleeper(wq))
1507 1508 1509 1510 1511 1512 1513 1514 1515 1516
		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
						POLLWRNORM | POLLWRBAND);
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1517
static void tipc_data_ready(struct sock *sk)
1518 1519 1520 1521 1522
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1523
	if (skwq_has_sleeper(wq))
1524 1525 1526 1527 1528
		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
						POLLRDNORM | POLLRDBAND);
	rcu_read_unlock();
}

1529 1530 1531 1532 1533
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

1534 1535
/**
 * filter_connect - Handle all incoming messages for a connection-based socket
1536
 * @tsk: TIPC socket
1537
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1538
 *
1539
 * Returns true if everything ok, false otherwise
1540
 */
1541
static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1542
{
1543
	struct sock *sk = &tsk->sk;
1544
	struct net *net = sock_net(sk);
1545
	struct tipc_msg *hdr = buf_msg(skb);
1546

1547 1548
	if (unlikely(msg_mcast(hdr)))
		return false;
1549

1550 1551
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1552 1553 1554
		/* Accept only ACK or NACK message */
		if (unlikely(!msg_connected(hdr)))
			return false;
1555

1556
		if (unlikely(msg_errcode(hdr))) {
1557
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1558
			sk->sk_err = ECONNREFUSED;
1559
			return true;
1560 1561
		}

1562
		if (unlikely(!msg_isdata(hdr))) {
1563
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1564
			sk->sk_err = EINVAL;
1565
			return true;
1566 1567
		}

1568 1569
		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
		msg_set_importance(&tsk->phdr, msg_importance(hdr));
1570

1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
		/* If 'ACK+' message, add to socket receive queue */
		if (msg_data_sz(hdr))
			return true;

		/* If empty 'ACK-' message, wake up sleeping connect() */
		if (waitqueue_active(sk_sleep(sk)))
			wake_up_interruptible(sk_sleep(sk));

		/* 'ACK-' message is neither accepted nor rejected: */
		msg_set_dest_droppable(hdr, 1);
		return false;

1583
	case TIPC_OPEN:
1584
	case TIPC_DISCONNECTING:
1585 1586
		break;
	case TIPC_LISTEN:
1587
		/* Accept only SYN message */
1588 1589
		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
			return true;
1590
		break;
1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
		if (unlikely(!tsk_peer_msg(tsk, hdr)))
			return false;

		if (unlikely(msg_errcode(hdr))) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			/* Let timer expire on it's own */
			tipc_node_remove_conn(net, tsk_peer_node(tsk),
					      tsk->portid);
			sk->sk_state_change(sk);
		}
		return true;
1604
	default:
1605
		pr_err("Unknown sk_state %u\n", sk->sk_state);
1606
	}
1607

1608
	return false;
1609 1610
}

1611 1612 1613
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
1614
 * @skb: message
1615
 *
1616 1617
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
1618
 *
1619 1620
 * For connectionless messages, queue limits are based on message
 * importance as follows:
1621
 *
1622 1623 1624 1625
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
1626 1627 1628
 *
 * Returns overload limit according to corresponding message importance
 */
1629
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1630
{
1631 1632 1633 1634 1635
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
1636

1637 1638
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
1639

1640
	return FLOWCTL_MSG_LIM;
1641 1642
}

1643
/**
1644 1645
 * filter_rcv - validate incoming message
 * @sk: socket
1646
 * @skb: pointer to message.
1647
 *
1648 1649 1650
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
1651
 * Called with socket lock already taken
1652
 *
1653
 * Returns true if message was added to socket receive queue, otherwise false
P
Per Liden 已提交
1654
 */
J
Jon Paul Maloy 已提交
1655 1656
static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
		       struct sk_buff_head *xmitq)
P
Per Liden 已提交
1657
{
1658
	struct tipc_sock *tsk = tipc_sk(sk);
1659 1660 1661 1662
	struct tipc_msg *hdr = buf_msg(skb);
	unsigned int limit = rcvbuf_limit(sk, skb);
	int err = TIPC_OK;
	int usr = msg_user(hdr);
1663
	u32 onode;
P
Per Liden 已提交
1664

1665
	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
J
Jon Paul Maloy 已提交
1666
		tipc_sk_proto_rcv(tsk, skb, xmitq);
1667
		return false;
1668
	}
1669

1670
	if (unlikely(usr == SOCK_WAKEUP)) {
1671
		onode = msg_orignode(hdr);
1672
		kfree_skb(skb);
1673 1674
		u32_del(&tsk->cong_links, onode);
		tsk->cong_link_cnt--;
1675
		sk->sk_write_space(sk);
1676
		return false;
1677 1678
	}

1679 1680 1681 1682 1683
	/* Drop if illegal message type */
	if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
		kfree_skb(skb);
		return false;
	}
1684

1685
	/* Reject if wrong message type for current socket state */
1686
	if (tipc_sk_type_connectionless(sk)) {
1687 1688 1689 1690 1691 1692 1693
		if (msg_connected(hdr)) {
			err = TIPC_ERR_NO_PORT;
			goto reject;
		}
	} else if (unlikely(!filter_connect(tsk, skb))) {
		err = TIPC_ERR_NO_PORT;
		goto reject;
P
Per Liden 已提交
1694 1695 1696
	}

	/* Reject message if there isn't room to queue it */
1697 1698 1699 1700
	if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
		err = TIPC_ERR_OVERLOAD;
		goto reject;
	}
P
Per Liden 已提交
1701

1702
	/* Enqueue message */
1703
	TIPC_SKB_CB(skb)->bytes_read = 0;
1704 1705
	__skb_queue_tail(&sk->sk_receive_queue, skb);
	skb_set_owner_r(skb, sk);
1706

1707
	sk->sk_data_ready(sk);
1708 1709 1710
	return true;

reject:
J
Jon Paul Maloy 已提交
1711 1712
	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
		__skb_queue_tail(xmitq, skb);
1713
	return false;
1714
}
P
Per Liden 已提交
1715

1716
/**
1717
 * tipc_backlog_rcv - handle incoming message from backlog queue
1718
 * @sk: socket
1719
 * @skb: message
1720
 *
1721
 * Caller must hold socket lock
1722 1723 1724
 *
 * Returns 0
 */
1725
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1726
{
1727
	unsigned int truesize = skb->truesize;
J
Jon Paul Maloy 已提交
1728 1729
	struct sk_buff_head xmitq;
	u32 dnode, selector;
1730

J
Jon Paul Maloy 已提交
1731 1732 1733
	__skb_queue_head_init(&xmitq);

	if (likely(filter_rcv(sk, skb, &xmitq))) {
1734
		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745
		return 0;
	}

	if (skb_queue_empty(&xmitq))
		return 0;

	/* Send response/rejected message */
	skb = __skb_dequeue(&xmitq);
	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
1746 1747 1748
	return 0;
}

1749
/**
1750 1751 1752 1753 1754
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
1755 1756 1757
 *
 * Caller must hold socket lock
 */
1758
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
1759
			    u32 dport, struct sk_buff_head *xmitq)
1760
{
J
Jon Paul Maloy 已提交
1761 1762
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
1763 1764
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
1765
	u32 onode;
1766 1767

	while (skb_queue_len(inputq)) {
1768
		if (unlikely(time_after_eq(jiffies, time_limit)))
1769 1770
			return;

1771 1772
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
1773 1774 1775
			return;

		/* Add message directly to receive queue if possible */
1776
		if (!sock_owned_by_user(sk)) {
J
Jon Paul Maloy 已提交
1777
			filter_rcv(sk, skb, xmitq);
1778
			continue;
1779
		}
1780 1781

		/* Try backlog, compensating for double-counted bytes */
1782
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
1783
		if (!sk->sk_backlog.len)
1784 1785 1786 1787
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
1788 1789

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
1790 1791 1792
		onode = tipc_own_addr(sock_net(sk));
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
1793
		break;
1794
	}
1795 1796
}

1797
/**
1798 1799 1800 1801
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
1802
 */
1803
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1804
{
J
Jon Paul Maloy 已提交
1805
	struct sk_buff_head xmitq;
1806
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
1807
	int err;
1808 1809
	struct tipc_sock *tsk;
	struct sock *sk;
1810
	struct sk_buff *skb;
1811

J
Jon Paul Maloy 已提交
1812
	__skb_queue_head_init(&xmitq);
1813 1814 1815
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
1816

1817 1818 1819
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
1820
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
1821 1822
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
1823 1824 1825 1826 1827
			/* Send pending response/rejected messages, if any */
			while ((skb = __skb_dequeue(&xmitq))) {
				dnode = msg_destnode(buf_msg(skb));
				tipc_node_xmit_skb(net, skb, dnode, dport);
			}
1828 1829 1830
			sock_put(sk);
			continue;
		}
1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843

		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
1844
			continue;
1845
xmit:
1846
		dnode = msg_destnode(buf_msg(skb));
1847
		tipc_node_xmit_skb(net, skb, dnode, dport);
1848
	}
P
Per Liden 已提交
1849 1850
}

Y
Ying Xue 已提交
1851 1852
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
1853
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
1866
		add_wait_queue(sk_sleep(sk), &wait);
1867
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
1868 1869
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
1870 1871 1872 1873
	} while (!done);
	return 0;
}

P
Per Liden 已提交
1874
/**
1875
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
1876 1877 1878
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
1879
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
1880 1881 1882
 *
 * Returns 0 on success, errno otherwise
 */
1883 1884
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
1885
{
1886
	struct sock *sk = sock->sk;
1887
	struct tipc_sock *tsk = tipc_sk(sk);
1888 1889
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
1890
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
1891
	int previous;
1892
	int res = 0;
1893

1894 1895
	lock_sock(sk);

1896
	/* DGRAM/RDM connect(), just save the destaddr */
1897
	if (tipc_sk_type_connectionless(sk)) {
1898
		if (dst->family == AF_UNSPEC) {
1899
			memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1900 1901
		} else if (destlen != sizeof(struct sockaddr_tipc)) {
			res = -EINVAL;
1902
		} else {
1903
			memcpy(&tsk->peer, dest, destlen);
1904
		}
1905 1906
		goto exit;
	}
1907 1908 1909 1910 1911 1912 1913

	/*
	 * Reject connection attempt using multicast address
	 *
	 * Note: send_msg() validates the rest of the address fields,
	 *       so there's no need to do it here
	 */
1914 1915 1916 1917 1918
	if (dst->addrtype == TIPC_ADDR_MCAST) {
		res = -EINVAL;
		goto exit;
	}

1919
	previous = sk->sk_state;
1920 1921 1922

	switch (sk->sk_state) {
	case TIPC_OPEN:
1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

1933
		res = __tipc_sendmsg(sock, &m, 0);
1934 1935 1936
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

1937
		/* Just entered TIPC_CONNECTING state; the only
1938 1939 1940 1941
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
1942 1943 1944 1945 1946
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
1947
			goto exit;
1948
		}
Y
Ying Xue 已提交
1949 1950 1951
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
1952 1953
		break;
	case TIPC_ESTABLISHED:
1954
		res = -EISCONN;
1955 1956
		break;
	default:
1957
		res = -EINVAL;
1958
	}
1959

1960 1961
exit:
	release_sock(sk);
1962
	return res;
P
Per Liden 已提交
1963 1964
}

1965
/**
1966
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
1967 1968
 * @sock: socket structure
 * @len: (unused)
1969
 *
P
Per Liden 已提交
1970 1971
 * Returns 0 on success, errno otherwise
 */
1972
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
1973
{
1974 1975 1976 1977
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
1978
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
1979
	release_sock(sk);
1980

1981
	return res;
P
Per Liden 已提交
1982 1983
}

Y
Ying Xue 已提交
1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
1998
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
1999 2000 2001 2002 2003 2004 2005 2006 2007 2008
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
2009 2010 2011
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
2012 2013 2014 2015 2016
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2017
/**
2018
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2019 2020 2021
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2022
 *
P
Per Liden 已提交
2023 2024
 * Returns 0 on success, errno otherwise
 */
2025
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
P
Per Liden 已提交
2026
{
2027
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2028
	struct sk_buff *buf;
2029
	struct tipc_sock *new_tsock;
2030
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2031
	long timeo;
2032
	int res;
P
Per Liden 已提交
2033

2034
	lock_sock(sk);
P
Per Liden 已提交
2035

2036
	if (sk->sk_state != TIPC_LISTEN) {
2037
		res = -EINVAL;
P
Per Liden 已提交
2038 2039
		goto exit;
	}
Y
Ying Xue 已提交
2040 2041 2042 2043
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2044 2045 2046

	buf = skb_peek(&sk->sk_receive_queue);

2047
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
2048 2049
	if (res)
		goto exit;
2050
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2051

2052
	new_sk = new_sock->sk;
2053
	new_tsock = tipc_sk(new_sk);
2054
	msg = buf_msg(buf);
P
Per Liden 已提交
2055

2056 2057 2058 2059 2060 2061 2062
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2063
	tsk_rej_rx_queue(new_sk);
2064 2065

	/* Connect new socket to it's peer */
2066
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2067

2068
	tsk_set_importance(new_tsock, msg_importance(msg));
2069
	if (msg_named(msg)) {
2070 2071
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2072
	}
2073 2074 2075 2076 2077 2078 2079 2080

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2081
		tsk_advance_rx_queue(sk);
2082
		__tipc_sendstream(new_sock, &m, 0);
2083 2084 2085
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2086
		skb_set_owner_r(buf, new_sk);
2087 2088
	}
	release_sock(new_sk);
P
Per Liden 已提交
2089
exit:
2090
	release_sock(sk);
P
Per Liden 已提交
2091 2092 2093 2094
	return res;
}

/**
2095
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2096
 * @sock: socket structure
2097
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2098 2099
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2100
 *
P
Per Liden 已提交
2101 2102
 * Returns 0 on success, errno otherwise
 */
2103
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2104
{
2105
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2106 2107
	int res;

2108 2109
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2110

2111
	lock_sock(sk);
P
Per Liden 已提交
2112

2113 2114
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2115

2116
	if (sk->sk_state == TIPC_DISCONNECTING) {
2117
		/* Discard any unreceived messages */
2118
		__skb_queue_purge(&sk->sk_receive_queue);
2119 2120 2121

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2122
		res = 0;
2123
	} else {
P
Per Liden 已提交
2124 2125 2126
		res = -ENOTCONN;
	}

2127
	release_sock(sk);
P
Per Liden 已提交
2128 2129 2130
	return res;
}

2131
static void tipc_sk_timeout(unsigned long data)
2132
{
2133 2134
	struct tipc_sock *tsk = (struct tipc_sock *)data;
	struct sock *sk = &tsk->sk;
2135
	struct sk_buff *skb = NULL;
2136
	u32 peer_port, peer_node;
2137
	u32 own_node = tsk_own_node(tsk);
2138

J
Jon Paul Maloy 已提交
2139
	bh_lock_sock(sk);
2140
	if (!tipc_sk_connected(sk)) {
J
Jon Paul Maloy 已提交
2141 2142
		bh_unlock_sock(sk);
		goto exit;
2143
	}
2144 2145
	peer_port = tsk_peer_port(tsk);
	peer_node = tsk_peer_node(tsk);
2146

2147
	if (tsk->probe_unacked) {
2148
		if (!sock_owned_by_user(sk)) {
2149
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2150 2151 2152 2153 2154 2155 2156 2157
			tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
					      tsk_peer_port(tsk));
			sk->sk_state_change(sk);
		} else {
			/* Try again later */
			sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
		}

2158 2159
		bh_unlock_sock(sk);
		goto exit;
2160
	}
2161 2162 2163 2164

	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
			      INT_H_SIZE, 0, peer_node, own_node,
			      peer_port, tsk->portid, TIPC_OK);
2165
	tsk->probe_unacked = true;
2166
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
2167
	bh_unlock_sock(sk);
2168
	if (skb)
2169
		tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
J
Jon Paul Maloy 已提交
2170
exit:
2171
	sock_put(sk);
2172 2173
}

2174
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2175 2176
			   struct tipc_name_seq const *seq)
{
2177 2178
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2179 2180 2181
	struct publication *publ;
	u32 key;

2182
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2183
		return -EINVAL;
2184 2185
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2186 2187
		return -EADDRINUSE;

2188
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2189
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2190 2191 2192
	if (unlikely(!publ))
		return -EINVAL;

2193 2194 2195
	list_add(&publ->pport_list, &tsk->publications);
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2196 2197 2198
	return 0;
}

2199
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2200 2201
			    struct tipc_name_seq const *seq)
{
2202
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2203 2204 2205 2206
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

2207
	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
J
Jon Paul Maloy 已提交
2208 2209 2210 2211 2212 2213 2214 2215 2216
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2217
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2218 2219 2220 2221
					      publ->ref, publ->key);
			rc = 0;
			break;
		}
2222
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2223 2224 2225
				      publ->ref, publ->key);
		rc = 0;
	}
2226 2227
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2228 2229 2230
	return rc;
}

2231 2232 2233
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2234
void tipc_sk_reinit(struct net *net)
2235
{
2236
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2237 2238 2239
	const struct bucket_table *tbl;
	struct rhash_head *pos;
	struct tipc_sock *tsk;
2240
	struct tipc_msg *msg;
2241
	int i;
2242

2243
	rcu_read_lock();
2244
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2245 2246 2247 2248
	for (i = 0; i < tbl->size; i++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2249 2250
			msg_set_prevnode(msg, tn->own_addr);
			msg_set_orignode(msg, tn->own_addr);
2251 2252
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2253
	}
2254
	rcu_read_unlock();
2255 2256
}

2257
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2258
{
2259
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2260
	struct tipc_sock *tsk;
2261

2262
	rcu_read_lock();
2263
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2264 2265 2266
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2267

2268
	return tsk;
2269 2270
}

2271
static int tipc_sk_insert(struct tipc_sock *tsk)
2272
{
2273 2274 2275
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2276 2277
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2278

2279 2280 2281 2282 2283 2284
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2285 2286
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2287 2288
			return 0;
		sock_put(&tsk->sk);
2289 2290
	}

2291
	return -1;
2292 2293
}

2294
static void tipc_sk_remove(struct tipc_sock *tsk)
2295
{
2296
	struct sock *sk = &tsk->sk;
2297
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2298

2299
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2300 2301
		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
		__sock_put(sk);
2302 2303 2304
	}
}

2305 2306 2307 2308 2309 2310 2311
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2312
	.automatic_shrinking = true,
2313 2314
};

2315
int tipc_sk_rht_init(struct net *net)
2316
{
2317
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2318 2319

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2320 2321
}

2322
void tipc_sk_rht_destroy(struct net *net)
2323
{
2324 2325
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2326 2327
	/* Wait for socket readers to complete */
	synchronize_net();
2328

2329
	rhashtable_destroy(&tn->sk_rht);
2330 2331
}

P
Per Liden 已提交
2332
/**
2333
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2334 2335 2336 2337 2338
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2339 2340
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2341
 * (to ease compatibility).
2342
 *
P
Per Liden 已提交
2343 2344
 * Returns 0 on success, errno otherwise
 */
2345 2346
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2347
{
2348
	struct sock *sk = sock->sk;
2349
	struct tipc_sock *tsk = tipc_sk(sk);
2350
	u32 value = 0;
P
Per Liden 已提交
2351 2352
	int res;

2353 2354
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2355 2356
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372

	switch (opt) {
	case TIPC_IMPORTANCE:
	case TIPC_SRC_DROPPABLE:
	case TIPC_DEST_DROPPABLE:
	case TIPC_CONN_TIMEOUT:
		if (ol < sizeof(value))
			return -EINVAL;
		res = get_user(value, (u32 __user *)ov);
		if (res)
			return res;
		break;
	default:
		if (ov || ol)
			return -EINVAL;
	}
P
Per Liden 已提交
2373

2374
	lock_sock(sk);
2375

P
Per Liden 已提交
2376 2377
	switch (opt) {
	case TIPC_IMPORTANCE:
2378
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2379 2380 2381
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2382
			tsk_set_unreliable(tsk, value);
2383
		else
P
Per Liden 已提交
2384 2385 2386
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2387
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2388 2389
		break;
	case TIPC_CONN_TIMEOUT:
2390
		tipc_sk(sk)->conn_timeout = value;
2391
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2392
		break;
2393 2394 2395 2396 2397 2398 2399 2400
	case TIPC_MCAST_BROADCAST:
		tsk->mc_method.rcast = false;
		tsk->mc_method.mandatory = true;
		break;
	case TIPC_MCAST_REPLICAST:
		tsk->mc_method.rcast = true;
		tsk->mc_method.mandatory = true;
		break;
P
Per Liden 已提交
2401 2402 2403 2404
	default:
		res = -EINVAL;
	}

2405 2406
	release_sock(sk);

P
Per Liden 已提交
2407 2408 2409 2410
	return res;
}

/**
2411
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2412 2413 2414 2415 2416
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2417 2418
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2419
 * (to ease compatibility).
2420
 *
P
Per Liden 已提交
2421 2422
 * Returns 0 on success, errno otherwise
 */
2423 2424
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2425
{
2426
	struct sock *sk = sock->sk;
2427
	struct tipc_sock *tsk = tipc_sk(sk);
2428
	int len;
P
Per Liden 已提交
2429
	u32 value;
2430
	int res;
P
Per Liden 已提交
2431

2432 2433
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2434 2435
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2436 2437
	res = get_user(len, ol);
	if (res)
2438
		return res;
P
Per Liden 已提交
2439

2440
	lock_sock(sk);
P
Per Liden 已提交
2441 2442 2443

	switch (opt) {
	case TIPC_IMPORTANCE:
2444
		value = tsk_importance(tsk);
P
Per Liden 已提交
2445 2446
		break;
	case TIPC_SRC_DROPPABLE:
2447
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2448 2449
		break;
	case TIPC_DEST_DROPPABLE:
2450
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2451 2452
		break;
	case TIPC_CONN_TIMEOUT:
2453
		value = tsk->conn_timeout;
2454
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2455
		break;
2456
	case TIPC_NODE_RECVQ_DEPTH:
2457
		value = 0; /* was tipc_queue_size, now obsolete */
2458
		break;
2459
	case TIPC_SOCK_RECVQ_DEPTH:
2460 2461
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
P
Per Liden 已提交
2462 2463 2464 2465
	default:
		res = -EINVAL;
	}

2466 2467
	release_sock(sk);

2468 2469
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2470

2471 2472 2473 2474 2475 2476 2477
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2478 2479
}

2480
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2481
{
2482
	struct sock *sk = sock->sk;
E
Erik Hugne 已提交
2483 2484 2485 2486 2487 2488 2489
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
2490 2491
		if (!tipc_node_get_linkname(sock_net(sk),
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

2503 2504
/* Protocol switches for the various types of TIPC sockets */

2505
static const struct proto_ops msg_ops = {
2506
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2507
	.family		= AF_TIPC,
2508 2509 2510
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2511
	.socketpair	= sock_no_socketpair,
2512
	.accept		= sock_no_accept,
2513 2514
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2515
	.ioctl		= tipc_ioctl,
2516
	.listen		= sock_no_listen,
2517 2518 2519 2520 2521
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
2522 2523
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2524 2525
};

2526
static const struct proto_ops packet_ops = {
2527
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2528
	.family		= AF_TIPC,
2529 2530 2531
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2532
	.socketpair	= sock_no_socketpair,
2533 2534 2535
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2536
	.ioctl		= tipc_ioctl,
2537 2538 2539 2540 2541 2542
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
2543 2544
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2545 2546
};

2547
static const struct proto_ops stream_ops = {
2548
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2549
	.family		= AF_TIPC,
2550 2551 2552
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2553
	.socketpair	= sock_no_socketpair,
2554 2555 2556
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2557
	.ioctl		= tipc_ioctl,
2558 2559 2560 2561
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
2562
	.sendmsg	= tipc_sendstream,
2563
	.recvmsg	= tipc_recv_stream,
2564 2565
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2566 2567
};

2568
static const struct net_proto_family tipc_family_ops = {
2569
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2570
	.family		= AF_TIPC,
2571
	.create		= tipc_sk_create
P
Per Liden 已提交
2572 2573 2574 2575 2576
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
2577 2578
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
2579 2580 2581
};

/**
2582
 * tipc_socket_init - initialize TIPC socket interface
2583
 *
P
Per Liden 已提交
2584 2585
 * Returns 0 on success, errno otherwise
 */
2586
int tipc_socket_init(void)
P
Per Liden 已提交
2587 2588 2589
{
	int res;

2590
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
2591
	if (res) {
2592
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
2593 2594 2595 2596 2597
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
2598
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
2599 2600 2601 2602 2603 2604 2605 2606
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
2607
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
2608
 */
2609
void tipc_socket_stop(void)
P
Per Liden 已提交
2610 2611 2612 2613
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
2614 2615

/* Caller should hold socket lock for the passed tipc socket. */
2616
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2651 2652
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
2653 2654 2655 2656
{
	int err;
	void *hdr;
	struct nlattr *attrs;
2657 2658
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2659
	struct sock *sk = &tsk->sk;
2660 2661

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2662
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
2663 2664 2665 2666 2667 2668
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
2669
	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
2670
		goto attr_msg_cancel;
2671
	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
2672 2673
		goto attr_msg_cancel;

2674
	if (tipc_sk_connected(sk)) {
2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698
		err = __tipc_nl_add_sk_con(skb, tsk);
		if (err)
			goto attr_msg_cancel;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			goto attr_msg_cancel;
	}
	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
	struct tipc_sock *tsk;
2699 2700
	const struct bucket_table *tbl;
	struct rhash_head *pos;
2701 2702
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2703 2704
	u32 tbl_id = cb->args[0];
	u32 prev_portid = cb->args[1];
2705

2706
	rcu_read_lock();
2707
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2708 2709
	for (; tbl_id < tbl->size; tbl_id++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
2710
			spin_lock_bh(&tsk->sk.sk_lock.slock);
2711 2712 2713 2714 2715
			if (prev_portid && prev_portid != tsk->portid) {
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				continue;
			}

2716
			err = __tipc_nl_add_sk(skb, cb, tsk);
2717 2718 2719 2720 2721 2722
			if (err) {
				prev_portid = tsk->portid;
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				goto out;
			}
			prev_portid = 0;
2723 2724
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2725
	}
2726
out:
2727
	rcu_read_unlock();
2728 2729
	cb->args[0] = tbl_id;
	cb->args[1] = prev_portid;
2730 2731 2732

	return skb->len;
}
2733 2734

/* Caller should hold socket lock for the passed tipc socket. */
2735 2736 2737
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
2738 2739 2740 2741 2742
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2743
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2774 2775 2776
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816
{
	int err;
	struct publication *p;

	if (*last_publ) {
		list_for_each_entry(p, &tsk->publications, pport_list) {
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
				     pport_list);
	}

	list_for_each_entry_from(p, &tsk->publications, pport_list) {
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
2817
	u32 tsk_portid = cb->args[0];
2818 2819
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
2820
	struct net *net = sock_net(skb->sk);
2821 2822
	struct tipc_sock *tsk;

2823
	if (!tsk_portid) {
2824 2825 2826 2827 2828 2829 2830
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

2831 2832 2833
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

2834 2835 2836 2837 2838 2839 2840 2841 2842
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
				       tipc_nl_sock_policy);
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

2843
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
2844 2845 2846 2847 2848
	}

	if (done)
		return 0;

2849
	tsk = tipc_sk_lookup(net, tsk_portid);
2850 2851 2852 2853 2854 2855 2856 2857
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
2858
	sock_put(&tsk->sk);
2859

2860
	cb->args[0] = tsk_portid;
2861 2862 2863 2864 2865
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}