socket.c 70.9 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
4
 * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
P
Per Liden 已提交
38
#include "core.h"
39
#include "name_table.h"
E
Erik Hugne 已提交
40
#include "node.h"
41
#include "link.h"
42
#include "name_distr.h"
43
#include "socket.h"
44
#include "bcast.h"
45
#include "netlink.h"
46

47
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
48
#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
49 50 51
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
52

53 54
enum {
	TIPC_LISTEN = TCP_LISTEN,
55
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
56
	TIPC_OPEN = TCP_CLOSE,
57
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
58
	TIPC_CONNECTING = TCP_SYN_SENT,
59 60
};

61 62 63 64 65 66 67
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
68
 * @portid: unique port identity in TIPC socket hash table
69
 * @phdr: preformatted message header used when sending messages
70
 * #cong_links: list of congested links
71
 * @publications: list of publications for port
72
 * @blocking_link: address of the congested link we are currently sleeping on
73 74 75 76
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
77
 * @cong_link_cnt: number of congested links
78 79
 * @sent_unacked: # messages sent by socket, and not yet acked by peer
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
80
 * @peer: 'connected' peer for dgram/rdm
81 82
 * @node: hash table node
 * @rcu: rcu struct for tipc_sock
83 84 85 86 87 88 89
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
90
	u32 portid;
91
	struct tipc_msg phdr;
92
	struct list_head cong_links;
93 94 95 96
	struct list_head publications;
	u32 pub_count;
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
97
	bool probe_unacked;
98
	u16 cong_link_cnt;
99 100
	u16 snt_unacked;
	u16 snd_win;
101
	u16 peer_caps;
102 103
	u16 rcv_unacked;
	u16 rcv_win;
104
	struct sockaddr_tipc peer;
105 106
	struct rhash_head node;
	struct rcu_head rcu;
107
};
P
Per Liden 已提交
108

109
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
110
static void tipc_data_ready(struct sock *sk);
111
static void tipc_write_space(struct sock *sk);
112
static void tipc_sock_destruct(struct sock *sk);
113 114
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
115
static void tipc_sk_timeout(unsigned long data);
116
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
117
			   struct tipc_name_seq const *seq);
118
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
119
			    struct tipc_name_seq const *seq);
120
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
121 122
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
123
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
124
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
125

126 127 128
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
129
static struct proto tipc_proto;
130 131
static const struct rhashtable_params tsk_rht_params;

132 133 134 135 136
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

137
static u32 tsk_peer_node(struct tipc_sock *tsk)
138
{
139
	return msg_destnode(&tsk->phdr);
140 141
}

142
static u32 tsk_peer_port(struct tipc_sock *tsk)
143
{
144
	return msg_destport(&tsk->phdr);
145 146
}

147
static  bool tsk_unreliable(struct tipc_sock *tsk)
148
{
149
	return msg_src_droppable(&tsk->phdr) != 0;
150 151
}

152
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
153
{
154
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
155 156
}

157
static bool tsk_unreturnable(struct tipc_sock *tsk)
158
{
159
	return msg_dest_droppable(&tsk->phdr) != 0;
160 161
}

162
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
163
{
164
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
165 166
}

167
static int tsk_importance(struct tipc_sock *tsk)
168
{
169
	return msg_importance(&tsk->phdr);
170 171
}

172
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
173 174 175
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
176
	msg_set_importance(&tsk->phdr, (u32)imp);
177 178
	return 0;
}
179

180 181 182 183 184
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

185
static bool tsk_conn_cong(struct tipc_sock *tsk)
186
{
187
	return tsk->snt_unacked > tsk->snd_win;
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
}

/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
208 209
}

210
/**
211
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
212 213
 *
 * Caller must hold socket lock
P
Per Liden 已提交
214
 */
215
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
216
{
217
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
218 219
}

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
236
/**
237
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
238 239
 *
 * Caller must hold socket lock
P
Per Liden 已提交
240
 */
241
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
242
{
243
	struct sk_buff *skb;
244

245 246
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
247 248
}

249 250
static bool tipc_sk_connected(struct sock *sk)
{
251
	return sk->sk_state == TIPC_ESTABLISHED;
252 253
}

254 255 256 257 258 259 260 261 262 263
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

264
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
265 266 267 268
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
269
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
270
{
271 272
	struct sock *sk = &tsk->sk;
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
273
	u32 peer_port = tsk_peer_port(tsk);
J
Jon Paul Maloy 已提交
274 275 276
	u32 orig_node;
	u32 peer_node;

277
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
278 279 280 281 282 283
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
284
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
285 286 287 288

	if (likely(orig_node == peer_node))
		return true;

289
	if (!orig_node && (peer_node == tn->own_addr))
J
Jon Paul Maloy 已提交
290 291
		return true;

292
	if (!peer_node && (orig_node == tn->own_addr))
J
Jon Paul Maloy 已提交
293 294 295 296 297
		return true;

	return false;
}

298 299 300 301 302 303 304 305 306
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
307
	int oldsk_state = sk->sk_state;
308 309 310
	int res = -EINVAL;

	switch (state) {
311 312 313
	case TIPC_OPEN:
		res = 0;
		break;
314
	case TIPC_LISTEN:
315
	case TIPC_CONNECTING:
316
		if (oldsk_state == TIPC_OPEN)
317 318
			res = 0;
		break;
319
	case TIPC_ESTABLISHED:
320
		if (oldsk_state == TIPC_CONNECTING ||
321
		    oldsk_state == TIPC_OPEN)
322 323
			res = 0;
		break;
324
	case TIPC_DISCONNECTING:
325
		if (oldsk_state == TIPC_CONNECTING ||
326 327 328
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
329 330 331 332 333 334 335 336
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

#define tipc_wait_for_cond(sock_, timeout_, condition_)			\
({								        \
	int rc_ = 0;							\
	int done_ = 0;							\
									\
	while (!(condition_) && !done_) {				\
		struct sock *sk_ = sock->sk;				\
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);		\
									\
		rc_ = tipc_sk_sock_err(sock_, timeout_);		\
		if (rc_)						\
			break;						\
		prepare_to_wait(sk_sleep(sk_), &wait_,			\
				TASK_INTERRUPTIBLE);			\
		done_ = sk_wait_event(sk_, timeout_,			\
				      (condition_), &wait_);		\
		remove_wait_queue(sk_sleep(sk_), &wait_);		\
	}								\
	rc_;								\
})

P
Per Liden 已提交
380
/**
381
 * tipc_sk_create - create a TIPC socket
382
 * @net: network namespace (must be default network)
P
Per Liden 已提交
383 384
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
385
 * @kern: caused by kernel or by userspace?
386
 *
387 388
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
389 390 391
 *
 * Returns 0 on success, errno otherwise
 */
392 393
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
394
{
395
	struct tipc_net *tn;
396
	const struct proto_ops *ops;
P
Per Liden 已提交
397
	struct sock *sk;
398
	struct tipc_sock *tsk;
399
	struct tipc_msg *msg;
400 401

	/* Validate arguments */
P
Per Liden 已提交
402 403 404 405 406
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
407
		ops = &stream_ops;
P
Per Liden 已提交
408 409
		break;
	case SOCK_SEQPACKET:
410
		ops = &packet_ops;
P
Per Liden 已提交
411 412 413
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
414
		ops = &msg_ops;
P
Per Liden 已提交
415
		break;
416 417
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
418 419
	}

420
	/* Allocate socket's protocol area */
421
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
422
	if (sk == NULL)
P
Per Liden 已提交
423 424
		return -ENOMEM;

425
	tsk = tipc_sk(sk);
426 427
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
428
	INIT_LIST_HEAD(&tsk->cong_links);
429
	msg = &tsk->phdr;
430 431
	tn = net_generic(sock_net(sk), tipc_net_id);
	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
432
		      NAMED_H_SIZE, 0);
P
Per Liden 已提交
433

434 435 436
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
437
	tipc_set_sk_state(sk, TIPC_OPEN);
438
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
439
		pr_warn("Socket create failed; port number exhausted\n");
440 441 442
		return -EINVAL;
	}
	msg_set_origport(msg, tsk->portid);
443
	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
444
	sk->sk_shutdown = 0;
445
	sk->sk_backlog_rcv = tipc_backlog_rcv;
446
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
447 448
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
449
	sk->sk_destruct = tipc_sock_destruct;
450 451
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
	atomic_set(&tsk->dupl_rcvcnt, 0);
452

453 454 455 456
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

457
	if (tipc_sk_type_connectionless(sk)) {
458
		tsk_set_unreturnable(tsk, true);
459
		if (sock->type == SOCK_DGRAM)
460
			tsk_set_unreliable(tsk, true);
461
	}
462

P
Per Liden 已提交
463 464 465
	return 0;
}

466 467 468 469 470 471 472
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

473 474 475 476 477 478
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
479
	long timeout = CONN_TIMEOUT_DEFAULT;
480 481 482
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

483 484 485 486
	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
					    !tsk_conn_cong(tsk)));

487 488 489 490 491 492
	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
493
			continue;
494
		}
495 496 497 498 499 500
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
501
	}
502 503 504 505

	if (tipc_sk_type_connectionless(sk))
		return;

506 507 508 509 510 511 512
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
513 514
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
515 516 517
	}
}

P
Per Liden 已提交
518
/**
519
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
520 521 522 523 524 525 526
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
527
 *
P
Per Liden 已提交
528 529 530 531 532 533
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
534
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
535 536
{
	struct sock *sk = sock->sk;
537
	struct tipc_sock *tsk;
P
Per Liden 已提交
538

539 540 541 542 543
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
544
		return 0;
545

546
	tsk = tipc_sk(sk);
547 548
	lock_sock(sk);

549 550
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
551
	tipc_sk_withdraw(tsk, 0, NULL);
552
	sk_stop_timer(sk, &sk->sk_timer);
553
	tipc_sk_remove(tsk);
P
Per Liden 已提交
554

555 556
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
557 558
	u32_list_purge(&tsk->cong_links);
	tsk->cong_link_cnt = 0;
559
	call_rcu(&tsk->rcu, tipc_sk_callback);
560
	sock->sk = NULL;
P
Per Liden 已提交
561

562
	return 0;
P
Per Liden 已提交
563 564 565
}

/**
566
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
567 568 569
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
570
 *
P
Per Liden 已提交
571 572 573
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
574
 *
P
Per Liden 已提交
575
 * Returns 0 on success, errno otherwise
576 577 578
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
579
 */
580 581
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
582
{
583
	struct sock *sk = sock->sk;
P
Per Liden 已提交
584
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
585
	struct tipc_sock *tsk = tipc_sk(sk);
586
	int res = -EINVAL;
P
Per Liden 已提交
587

588 589
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
590
		res = tipc_sk_withdraw(tsk, 0, NULL);
591 592
		goto exit;
	}
593

594 595 596 597 598 599 600 601
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
602 603 604

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
605 606 607 608
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
609

610
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
611
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
612 613 614 615
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
616

617
	res = (addr->scope > 0) ?
618 619
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
620 621 622
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
623 624
}

625
/**
626
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
627 628 629
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
630
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
631
 *
P
Per Liden 已提交
632
 * Returns 0 on success, errno otherwise
633
 *
634 635
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
636
 *       a completely predictable manner).
P
Per Liden 已提交
637
 */
638 639
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
			int *uaddr_len, int peer)
P
Per Liden 已提交
640 641
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
642 643
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
644
	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
P
Per Liden 已提交
645

646
	memset(addr, 0, sizeof(*addr));
647
	if (peer) {
648
		if ((!tipc_sk_connected(sk)) &&
649
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
650
			return -ENOTCONN;
651 652
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
653
	} else {
654
		addr->addr.id.ref = tsk->portid;
655
		addr->addr.id.node = tn->own_addr;
656
	}
P
Per Liden 已提交
657 658 659 660 661 662 663

	*uaddr_len = sizeof(*addr);
	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

664
	return 0;
P
Per Liden 已提交
665 666 667
}

/**
668
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
669 670 671 672
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
 * @wait: ???
 *
673 674 675 676 677 678 679 680
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
681 682 683
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
684
 */
685 686
static unsigned int tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
687
{
688
	struct sock *sk = sock->sk;
689
	struct tipc_sock *tsk = tipc_sk(sk);
690
	u32 mask = 0;
691

692
	sock_poll_wait(file, sk_sleep(sk), wait);
693

694 695 696 697 698
	if (sk->sk_shutdown & RCV_SHUTDOWN)
		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
	if (sk->sk_shutdown == SHUTDOWN_MASK)
		mask |= POLLHUP;

699 700
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
701
		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
702
			mask |= POLLOUT;
703 704 705
		/* fall thru' */
	case TIPC_LISTEN:
	case TIPC_CONNECTING:
706 707
		if (!skb_queue_empty(&sk->sk_receive_queue))
			mask |= (POLLIN | POLLRDNORM);
708 709
		break;
	case TIPC_OPEN:
710
		if (!tsk->cong_link_cnt)
711 712 713 714 715 716 717 718
			mask |= POLLOUT;
		if (tipc_sk_type_connectionless(sk) &&
		    (!skb_queue_empty(&sk->sk_receive_queue)))
			mask |= (POLLIN | POLLRDNORM);
		break;
	case TIPC_DISCONNECTING:
		mask = (POLLIN | POLLRDNORM | POLLHUP);
		break;
719
	}
720 721

	return mask;
P
Per Liden 已提交
722 723
}

724 725 726 727
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
728
 * @msg: message to send
729 730
 * @dlen: length of data to send
 * @timeout: timeout to wait for wakeup
731 732 733 734 735
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
736
			  struct msghdr *msg, size_t dlen, long timeout)
737 738
{
	struct sock *sk = sock->sk;
739
	struct tipc_sock *tsk = tipc_sk(sk);
740
	struct tipc_msg *hdr = &tsk->phdr;
741
	struct net *net = sock_net(sk);
742 743
	int mtu = tipc_bcast_get_mtu(net);
	struct sk_buff_head pkts;
744 745
	int rc;

746 747 748
	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;
749

750 751 752 753 754 755 756 757 758 759 760 761
	msg_set_type(hdr, TIPC_MCAST_MSG);
	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nametype(hdr, seq->type);
	msg_set_namelower(hdr, seq->lower);
	msg_set_nameupper(hdr, seq->upper);
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);

	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
762 763
		return rc;

764 765 766 767 768 769 770
	rc = tipc_bcast_xmit(net, &pkts);
	if (unlikely(rc == -ELINKCONG)) {
		tsk->cong_link_cnt = 1;
		rc = 0;
	}

	return rc ? rc : dlen;
771 772
}

773 774 775 776 777 778
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
779
 */
780 781
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
782
{
783
	struct tipc_msg *msg;
784
	struct list_head dports;
785
	u32 portid;
786
	u32 scope = TIPC_CLUSTER_SCOPE;
787 788 789
	struct sk_buff_head tmpq;
	uint hsz;
	struct sk_buff *skb, *_skb;
790

791
	__skb_queue_head_init(&tmpq);
792
	INIT_LIST_HEAD(&dports);
793

794 795 796 797 798 799 800 801 802 803 804 805
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
		msg = buf_msg(skb);
		hsz = skb_headroom(skb) + msg_hdr_sz(msg);

		if (in_own_node(net, msg_orignode(msg)))
			scope = TIPC_NODE_SCOPE;

		/* Create destination port list and message clones: */
		tipc_nametbl_mc_translate(net,
					  msg_nametype(msg), msg_namelower(msg),
					  msg_nameupper(msg), scope, &dports);
806 807
		portid = u32_pop(&dports);
		for (; portid; portid = u32_pop(&dports)) {
808 809 810 811 812 813 814
			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
815
		}
816 817 818 819 820 821 822 823 824
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
825
	}
826
	tipc_sk_rcv(net, inputq);
827 828
}

829 830 831
/**
 * tipc_sk_proto_rcv - receive a connection mng protocol message
 * @tsk: receiving socket
832
 * @skb: pointer to message buffer.
833
 */
J
Jon Paul Maloy 已提交
834 835
static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
			      struct sk_buff_head *xmitq)
836
{
837
	struct sock *sk = &tsk->sk;
J
Jon Paul Maloy 已提交
838
	u32 onode = tsk_own_node(tsk);
839 840
	struct tipc_msg *hdr = buf_msg(skb);
	int mtyp = msg_type(hdr);
841
	bool conn_cong;
842

843
	/* Ignore if connection cannot be validated: */
844
	if (!tsk_peer_msg(tsk, hdr))
845 846
		goto exit;

847
	tsk->probe_unacked = false;
848

849 850
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
851 852
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
853 854
		return;
	} else if (mtyp == CONN_ACK) {
855
		conn_cong = tsk_conn_cong(tsk);
856 857 858
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
859
		if (conn_cong)
860 861 862
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
863 864
	}
exit:
865
	kfree_skb(skb);
866 867
}

P
Per Liden 已提交
868
/**
869
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
870 871
 * @sock: socket structure
 * @m: message to send
872
 * @dsz: amount of user data to be sent
873
 *
P
Per Liden 已提交
874
 * Message must have an destination specified explicitly.
875
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
876 877
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
878
 *
P
Per Liden 已提交
879 880
 * Returns the number of bytes sent on success, or errno otherwise
 */
881
static int tipc_sendmsg(struct socket *sock,
882
			struct msghdr *m, size_t dsz)
883 884 885 886 887 888 889 890 891 892 893
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

894
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
895
{
896
	struct sock *sk = sock->sk;
897
	struct net *net = sock_net(sk);
898 899 900 901 902 903
	struct tipc_sock *tsk = tipc_sk(sk);
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct list_head *clinks = &tsk->cong_links;
	bool syn = !tipc_sk_type_connectionless(sk);
	struct tipc_msg *hdr = &tsk->phdr;
904
	struct tipc_name_seq *seq;
905 906 907 908
	struct sk_buff_head pkts;
	u32 type, inst, domain;
	u32 dnode, dport;
	int mtu, rc;
P
Per Liden 已提交
909

910
	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
911
		return -EMSGSIZE;
912

913
	if (unlikely(!dest)) {
914 915
		dest = &tsk->peer;
		if (!syn || dest->family != AF_TIPC)
916 917
			return -EDESTADDRREQ;
	}
918 919 920 921 922 923 924 925

	if (unlikely(m->msg_namelen < sizeof(*dest)))
		return -EINVAL;

	if (unlikely(dest->family != AF_TIPC))
		return -EINVAL;

	if (unlikely(syn)) {
926
		if (sk->sk_state == TIPC_LISTEN)
927
			return -EPIPE;
928
		if (sk->sk_state != TIPC_OPEN)
929 930 931
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
932
		if (dest->addrtype == TIPC_ADDR_NAME) {
933 934
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
935
		}
P
Per Liden 已提交
936
	}
937

938 939 940
	seq = &dest->addr.nameseq;
	if (dest->addrtype == TIPC_ADDR_MCAST)
		return tipc_sendmcast(sock, seq, m, dlen, timeout);
941

942 943 944 945
	if (dest->addrtype == TIPC_ADDR_NAME) {
		type = dest->addr.name.name.type;
		inst = dest->addr.name.name.instance;
		domain = dest->addr.name.domain;
946
		dnode = domain;
947 948 949 950 951
		msg_set_type(hdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
		msg_set_nametype(hdr, type);
		msg_set_nameinst(hdr, inst);
		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
952
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
953 954
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dport);
955 956
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
957

958 959
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
960 961 962 963 964
		msg_set_type(hdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(hdr, 0);
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dest->addr.id.ref);
		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
965 966
	}

967 968 969 970 971 972
	/* Block or return if destination link is congested */
	rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
	if (unlikely(rc))
		return rc;

	skb_queue_head_init(&pkts);
973
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
974 975
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
976
		return rc;
977

978 979 980 981 982 983
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
		u32_push(clinks, dnode);
		tsk->cong_link_cnt++;
		rc = 0;
	}
984

985 986 987 988
	if (unlikely(syn && !rc))
		tipc_set_sk_state(sk, TIPC_CONNECTING);

	return rc ? rc : dlen;
P
Per Liden 已提交
989 990
}

991
/**
992
 * tipc_sendstream - send stream-oriented data
P
Per Liden 已提交
993
 * @sock: socket structure
994 995
 * @m: data to send
 * @dsz: total length of data to be transmitted
996
 *
997
 * Used for SOCK_STREAM data.
998
 *
999 1000
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1001
 */
1002
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1003 1004 1005 1006 1007
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
1008
	ret = __tipc_sendstream(sock, m, dsz);
1009 1010 1011 1012 1013
	release_sock(sk);

	return ret;
}

1014
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1015
{
1016
	struct sock *sk = sock->sk;
1017
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1018 1019 1020 1021 1022 1023 1024 1025
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = &tsk->phdr;
	struct net *net = sock_net(sk);
	struct sk_buff_head pkts;
	u32 dnode = tsk_peer_node(tsk);
	int send, sent = 0;
	int rc = 0;
1026

1027
	skb_queue_head_init(&pkts);
1028

1029 1030
	if (unlikely(dlen > INT_MAX))
		return -EMSGSIZE;
1031

1032 1033 1034 1035 1036
	/* Handle implicit connection setup */
	if (unlikely(dest)) {
		rc = __tipc_sendmsg(sock, m, dlen);
		if (dlen && (dlen == rc))
			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1037
		return rc;
1038
	}
1039

1040
	do {
1041 1042
		rc = tipc_wait_for_cond(sock, &timeout,
					(!tsk->cong_link_cnt &&
1043 1044
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
		if (unlikely(rc))
			break;

		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
		if (unlikely(rc != send))
			break;

		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
		if (unlikely(rc == -ELINKCONG)) {
			tsk->cong_link_cnt = 1;
			rc = 0;
		}
		if (likely(!rc)) {
			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
			sent += send;
		}
	} while (sent < dlen && !rc);
1063

1064
	return rc ? rc : sent;
P
Per Liden 已提交
1065 1066
}

1067
/**
1068
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1069
 * @sock: socket structure
1070 1071
 * @m: message to send
 * @dsz: length of data to be transmitted
1072
 *
1073
 * Used for SOCK_SEQPACKET messages.
1074
 *
1075
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1076
 */
1077
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1078
{
1079 1080
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1081

1082
	return tipc_sendstream(sock, m, dsz);
P
Per Liden 已提交
1083 1084
}

1085
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1086
 */
1087
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1088
				u32 peer_node)
P
Per Liden 已提交
1089
{
1090 1091
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1092
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1093

1094 1095 1096 1097 1098
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1099

1100
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
1101
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1102 1103
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1104
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1105 1106 1107 1108 1109 1110
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1111 1112 1113 1114 1115 1116
}

/**
 * set_orig_addr - capture sender's address for received message
 * @m: descriptor for message info
 * @msg: received message header
1117
 *
P
Per Liden 已提交
1118 1119
 * Note: Address is not captured if not requested by receiver.
 */
S
Sam Ravnborg 已提交
1120
static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
P
Per Liden 已提交
1121
{
1122
	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
P
Per Liden 已提交
1123

1124
	if (addr) {
P
Per Liden 已提交
1125 1126
		addr->family = AF_TIPC;
		addr->addrtype = TIPC_ADDR_ID;
1127
		memset(&addr->addr, 0, sizeof(addr->addr));
P
Per Liden 已提交
1128 1129
		addr->addr.id.ref = msg_origport(msg);
		addr->addr.id.node = msg_orignode(msg);
1130 1131
		addr->addr.name.domain = 0;	/* could leave uninitialized */
		addr->scope = 0;		/* could leave uninitialized */
P
Per Liden 已提交
1132 1133 1134 1135 1136
		m->msg_namelen = sizeof(struct sockaddr_tipc);
	}
}

/**
1137
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1138 1139
 * @m: descriptor for message info
 * @msg: received message header
1140
 * @tsk: TIPC port associated with message
1141
 *
P
Per Liden 已提交
1142
 * Note: Ancillary data is not captured if not requested by receiver.
1143
 *
P
Per Liden 已提交
1144 1145
 * Returns 0 if successful, otherwise errno
 */
1146 1147
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1148 1149 1150 1151
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1152
	int has_name;
P
Per Liden 已提交
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1163 1164
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1165
			return res;
1166 1167 1168 1169 1170 1171
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1172 1173 1174 1175 1176 1177
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1178
		has_name = 1;
P
Per Liden 已提交
1179 1180 1181 1182 1183
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1184
		has_name = 1;
P
Per Liden 已提交
1185 1186 1187 1188 1189
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1190 1191 1192 1193
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1194 1195
		break;
	default:
1196
		has_name = 0;
P
Per Liden 已提交
1197
	}
1198 1199 1200 1201 1202
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1203 1204 1205 1206

	return 0;
}

1207
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1208
{
1209 1210
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1211
	struct sk_buff *skb = NULL;
1212
	struct tipc_msg *msg;
1213 1214
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1215

1216
	if (!tipc_sk_connected(sk))
1217
		return;
1218 1219 1220
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1221
	if (!skb)
1222
		return;
1223
	msg = buf_msg(skb);
1224 1225 1226 1227 1228 1229 1230 1231
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1232
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1233 1234
}

1235
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1236 1237 1238
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1239
	long timeo = *timeop;
Y
Ying Xue 已提交
1240 1241 1242 1243
	int err;

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1244
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1245
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1259 1260 1261
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
1262 1263
	}
	finish_wait(sk_sleep(sk), &wait);
1264
	*timeop = timeo;
Y
Ying Xue 已提交
1265 1266 1267
	return err;
}

1268
/**
1269
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1270 1271 1272
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1273
 *
P
Per Liden 已提交
1274 1275 1276 1277 1278
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1279 1280
static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
			int flags)
P
Per Liden 已提交
1281
{
1282
	struct sock *sk = sock->sk;
1283
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1284 1285
	struct sk_buff *buf;
	struct tipc_msg *msg;
1286
	bool is_connectionless = tipc_sk_type_connectionless(sk);
Y
Ying Xue 已提交
1287
	long timeo;
P
Per Liden 已提交
1288 1289
	unsigned int sz;
	u32 err;
1290
	int res, hlen;
P
Per Liden 已提交
1291

1292
	/* Catch invalid receive requests */
P
Per Liden 已提交
1293 1294 1295
	if (unlikely(!buf_len))
		return -EINVAL;

1296
	lock_sock(sk);
P
Per Liden 已提交
1297

1298
	if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) {
1299
		res = -ENOTCONN;
P
Per Liden 已提交
1300 1301 1302
		goto exit;
	}

Y
Ying Xue 已提交
1303
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1304
restart:
P
Per Liden 已提交
1305

1306
	/* Look for a message in receive queue; wait if necessary */
1307
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1308 1309
	if (res)
		goto exit;
P
Per Liden 已提交
1310

1311 1312
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1313 1314
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
1315
	hlen = msg_hdr_sz(msg);
P
Per Liden 已提交
1316 1317 1318 1319
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1320
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1321 1322 1323 1324 1325 1326 1327
		goto restart;
	}

	/* Capture sender's address (optional) */
	set_orig_addr(m, msg);

	/* Capture ancillary data (optional) */
1328
	res = tipc_sk_anc_data_recv(m, msg, tsk);
1329
	if (res)
P
Per Liden 已提交
1330 1331 1332 1333 1334 1335 1336 1337
		goto exit;

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
		if (unlikely(buf_len < sz)) {
			sz = buf_len;
			m->msg_flags |= MSG_TRUNC;
		}
1338
		res = skb_copy_datagram_msg(buf, hlen, m, sz);
1339
		if (res)
P
Per Liden 已提交
1340 1341 1342
			goto exit;
		res = sz;
	} else {
1343 1344
		if (is_connectionless || err == TIPC_CONN_SHUTDOWN ||
		    m->msg_control)
P
Per Liden 已提交
1345 1346 1347 1348 1349
			res = 0;
		else
			res = -ECONNRESET;
	}

1350 1351 1352
	if (unlikely(flags & MSG_PEEK))
		goto exit;

1353
	if (likely(!is_connectionless)) {
1354 1355 1356
		tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
		if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
			tipc_sk_send_ack(tsk);
1357
	}
1358
	tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1359
exit:
1360
	release_sock(sk);
P
Per Liden 已提交
1361 1362 1363
	return res;
}

1364
/**
1365
 * tipc_recv_stream - receive stream-oriented data
P
Per Liden 已提交
1366 1367 1368
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1369 1370
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1371 1372 1373 1374
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1375 1376
static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
			    size_t buf_len, int flags)
P
Per Liden 已提交
1377
{
1378
	struct sock *sk = sock->sk;
1379
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1380 1381
	struct sk_buff *buf;
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1382
	long timeo;
P
Per Liden 已提交
1383
	unsigned int sz;
1384
	int target;
P
Per Liden 已提交
1385 1386
	int sz_copied = 0;
	u32 err;
1387
	int res = 0, hlen;
P
Per Liden 已提交
1388

1389
	/* Catch invalid receive attempts */
P
Per Liden 已提交
1390 1391 1392
	if (unlikely(!buf_len))
		return -EINVAL;

1393
	lock_sock(sk);
P
Per Liden 已提交
1394

1395
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1396
		res = -ENOTCONN;
P
Per Liden 已提交
1397 1398 1399
		goto exit;
	}

1400
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
Y
Ying Xue 已提交
1401
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1402

1403
restart:
1404
	/* Look for a message in receive queue; wait if necessary */
1405
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1406 1407
	if (res)
		goto exit;
P
Per Liden 已提交
1408

1409 1410
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1411 1412
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
1413
	hlen = msg_hdr_sz(msg);
P
Per Liden 已提交
1414 1415 1416 1417
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1418
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1419 1420 1421 1422 1423 1424
		goto restart;
	}

	/* Optionally capture sender's address & ancillary data of first msg */
	if (sz_copied == 0) {
		set_orig_addr(m, msg);
1425
		res = tipc_sk_anc_data_recv(m, msg, tsk);
1426
		if (res)
P
Per Liden 已提交
1427 1428 1429 1430 1431
			goto exit;
	}

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
1432 1433 1434
		u32 offset = TIPC_SKB_CB(buf)->bytes_read;
		u32 needed;
		int sz_to_copy;
P
Per Liden 已提交
1435

1436
		sz -= offset;
P
Per Liden 已提交
1437
		needed = (buf_len - sz_copied);
1438
		sz_to_copy = min(sz, needed);
1439

1440
		res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
1441
		if (res)
P
Per Liden 已提交
1442
			goto exit;
1443

P
Per Liden 已提交
1444 1445 1446 1447
		sz_copied += sz_to_copy;

		if (sz_to_copy < sz) {
			if (!(flags & MSG_PEEK))
1448 1449
				TIPC_SKB_CB(buf)->bytes_read =
					offset + sz_to_copy;
P
Per Liden 已提交
1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461
			goto exit;
		}
	} else {
		if (sz_copied != 0)
			goto exit; /* can't add error msg to valid data */

		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
			res = 0;
		else
			res = -ECONNRESET;
	}

1462 1463 1464 1465 1466 1467 1468
	if (unlikely(flags & MSG_PEEK))
		goto exit;

	tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
	if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
		tipc_sk_send_ack(tsk);
	tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1469 1470

	/* Loop around if more data is required */
1471 1472
	if ((sz_copied < buf_len) &&	/* didn't get all requested data */
	    (!skb_queue_empty(&sk->sk_receive_queue) ||
1473
	    (sz_copied < target)) &&	/* and more is ready or required */
1474
	    (!err))			/* and haven't reached a FIN */
P
Per Liden 已提交
1475 1476 1477
		goto restart;

exit:
1478
	release_sock(sk);
1479
	return sz_copied ? sz_copied : res;
P
Per Liden 已提交
1480 1481
}

1482 1483 1484 1485 1486 1487 1488 1489 1490 1491
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1492
	if (skwq_has_sleeper(wq))
1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
						POLLWRNORM | POLLWRBAND);
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1503
static void tipc_data_ready(struct sock *sk)
1504 1505 1506 1507 1508
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1509
	if (skwq_has_sleeper(wq))
1510 1511 1512 1513 1514
		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
						POLLRDNORM | POLLRDBAND);
	rcu_read_unlock();
}

1515 1516 1517 1518 1519
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

1520 1521
/**
 * filter_connect - Handle all incoming messages for a connection-based socket
1522
 * @tsk: TIPC socket
1523
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1524
 *
1525
 * Returns true if everything ok, false otherwise
1526
 */
1527
static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1528
{
1529
	struct sock *sk = &tsk->sk;
1530
	struct net *net = sock_net(sk);
1531
	struct tipc_msg *hdr = buf_msg(skb);
1532

1533 1534
	if (unlikely(msg_mcast(hdr)))
		return false;
1535

1536 1537
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1538 1539 1540
		/* Accept only ACK or NACK message */
		if (unlikely(!msg_connected(hdr)))
			return false;
1541

1542
		if (unlikely(msg_errcode(hdr))) {
1543
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1544
			sk->sk_err = ECONNREFUSED;
1545
			return true;
1546 1547
		}

1548
		if (unlikely(!msg_isdata(hdr))) {
1549
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1550
			sk->sk_err = EINVAL;
1551
			return true;
1552 1553
		}

1554 1555
		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
		msg_set_importance(&tsk->phdr, msg_importance(hdr));
1556

1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
		/* If 'ACK+' message, add to socket receive queue */
		if (msg_data_sz(hdr))
			return true;

		/* If empty 'ACK-' message, wake up sleeping connect() */
		if (waitqueue_active(sk_sleep(sk)))
			wake_up_interruptible(sk_sleep(sk));

		/* 'ACK-' message is neither accepted nor rejected: */
		msg_set_dest_droppable(hdr, 1);
		return false;

1569
	case TIPC_OPEN:
1570
	case TIPC_DISCONNECTING:
1571 1572
		break;
	case TIPC_LISTEN:
1573
		/* Accept only SYN message */
1574 1575
		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
			return true;
1576
		break;
1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
		if (unlikely(!tsk_peer_msg(tsk, hdr)))
			return false;

		if (unlikely(msg_errcode(hdr))) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			/* Let timer expire on it's own */
			tipc_node_remove_conn(net, tsk_peer_node(tsk),
					      tsk->portid);
			sk->sk_state_change(sk);
		}
		return true;
1590
	default:
1591
		pr_err("Unknown sk_state %u\n", sk->sk_state);
1592
	}
1593

1594
	return false;
1595 1596
}

1597 1598 1599
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
1600
 * @skb: message
1601
 *
1602 1603
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
1604
 *
1605 1606
 * For connectionless messages, queue limits are based on message
 * importance as follows:
1607
 *
1608 1609 1610 1611
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
1612 1613 1614
 *
 * Returns overload limit according to corresponding message importance
 */
1615
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1616
{
1617 1618 1619 1620 1621
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
1622

1623 1624
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
1625

1626
	return FLOWCTL_MSG_LIM;
1627 1628
}

1629
/**
1630 1631
 * filter_rcv - validate incoming message
 * @sk: socket
1632
 * @skb: pointer to message.
1633
 *
1634 1635 1636
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
1637
 * Called with socket lock already taken
1638
 *
1639
 * Returns true if message was added to socket receive queue, otherwise false
P
Per Liden 已提交
1640
 */
J
Jon Paul Maloy 已提交
1641 1642
static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
		       struct sk_buff_head *xmitq)
P
Per Liden 已提交
1643
{
1644
	struct tipc_sock *tsk = tipc_sk(sk);
1645 1646 1647 1648
	struct tipc_msg *hdr = buf_msg(skb);
	unsigned int limit = rcvbuf_limit(sk, skb);
	int err = TIPC_OK;
	int usr = msg_user(hdr);
1649
	u32 onode;
P
Per Liden 已提交
1650

1651
	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
J
Jon Paul Maloy 已提交
1652
		tipc_sk_proto_rcv(tsk, skb, xmitq);
1653
		return false;
1654
	}
1655

1656
	if (unlikely(usr == SOCK_WAKEUP)) {
1657
		onode = msg_orignode(hdr);
1658
		kfree_skb(skb);
1659 1660
		u32_del(&tsk->cong_links, onode);
		tsk->cong_link_cnt--;
1661
		sk->sk_write_space(sk);
1662
		return false;
1663 1664
	}

1665 1666 1667 1668 1669
	/* Drop if illegal message type */
	if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
		kfree_skb(skb);
		return false;
	}
1670

1671
	/* Reject if wrong message type for current socket state */
1672
	if (tipc_sk_type_connectionless(sk)) {
1673 1674 1675 1676 1677 1678 1679
		if (msg_connected(hdr)) {
			err = TIPC_ERR_NO_PORT;
			goto reject;
		}
	} else if (unlikely(!filter_connect(tsk, skb))) {
		err = TIPC_ERR_NO_PORT;
		goto reject;
P
Per Liden 已提交
1680 1681 1682
	}

	/* Reject message if there isn't room to queue it */
1683 1684 1685 1686
	if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
		err = TIPC_ERR_OVERLOAD;
		goto reject;
	}
P
Per Liden 已提交
1687

1688
	/* Enqueue message */
1689
	TIPC_SKB_CB(skb)->bytes_read = 0;
1690 1691
	__skb_queue_tail(&sk->sk_receive_queue, skb);
	skb_set_owner_r(skb, sk);
1692

1693
	sk->sk_data_ready(sk);
1694 1695 1696
	return true;

reject:
J
Jon Paul Maloy 已提交
1697 1698
	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
		__skb_queue_tail(xmitq, skb);
1699
	return false;
1700
}
P
Per Liden 已提交
1701

1702
/**
1703
 * tipc_backlog_rcv - handle incoming message from backlog queue
1704
 * @sk: socket
1705
 * @skb: message
1706
 *
1707
 * Caller must hold socket lock
1708 1709 1710
 *
 * Returns 0
 */
1711
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1712
{
1713
	unsigned int truesize = skb->truesize;
J
Jon Paul Maloy 已提交
1714 1715
	struct sk_buff_head xmitq;
	u32 dnode, selector;
1716

J
Jon Paul Maloy 已提交
1717 1718 1719
	__skb_queue_head_init(&xmitq);

	if (likely(filter_rcv(sk, skb, &xmitq))) {
1720
		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731
		return 0;
	}

	if (skb_queue_empty(&xmitq))
		return 0;

	/* Send response/rejected message */
	skb = __skb_dequeue(&xmitq);
	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
1732 1733 1734
	return 0;
}

1735
/**
1736 1737 1738 1739 1740
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
1741 1742 1743
 *
 * Caller must hold socket lock
 */
1744
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
1745
			    u32 dport, struct sk_buff_head *xmitq)
1746
{
J
Jon Paul Maloy 已提交
1747 1748
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
1749 1750
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
1751
	u32 onode;
1752 1753

	while (skb_queue_len(inputq)) {
1754
		if (unlikely(time_after_eq(jiffies, time_limit)))
1755 1756
			return;

1757 1758
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
1759 1760 1761
			return;

		/* Add message directly to receive queue if possible */
1762
		if (!sock_owned_by_user(sk)) {
J
Jon Paul Maloy 已提交
1763
			filter_rcv(sk, skb, xmitq);
1764
			continue;
1765
		}
1766 1767

		/* Try backlog, compensating for double-counted bytes */
1768
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
1769
		if (!sk->sk_backlog.len)
1770 1771 1772 1773
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
1774 1775

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
1776 1777 1778
		onode = tipc_own_addr(sock_net(sk));
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
1779
		break;
1780
	}
1781 1782
}

1783
/**
1784 1785 1786 1787
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
1788
 */
1789
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1790
{
J
Jon Paul Maloy 已提交
1791
	struct sk_buff_head xmitq;
1792
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
1793
	int err;
1794 1795
	struct tipc_sock *tsk;
	struct sock *sk;
1796
	struct sk_buff *skb;
1797

J
Jon Paul Maloy 已提交
1798
	__skb_queue_head_init(&xmitq);
1799 1800 1801
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
1802

1803 1804 1805
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
1806
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
1807 1808
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
1809 1810 1811 1812 1813
			/* Send pending response/rejected messages, if any */
			while ((skb = __skb_dequeue(&xmitq))) {
				dnode = msg_destnode(buf_msg(skb));
				tipc_node_xmit_skb(net, skb, dnode, dport);
			}
1814 1815 1816
			sock_put(sk);
			continue;
		}
1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829

		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
1830
			continue;
1831
xmit:
1832
		dnode = msg_destnode(buf_msg(skb));
1833
		tipc_node_xmit_skb(net, skb, dnode, dport);
1834
	}
P
Per Liden 已提交
1835 1836
}

Y
Ying Xue 已提交
1837 1838
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
1839
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
1852
		add_wait_queue(sk_sleep(sk), &wait);
1853
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
1854 1855
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
1856 1857 1858 1859
	} while (!done);
	return 0;
}

P
Per Liden 已提交
1860
/**
1861
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
1862 1863 1864
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
1865
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
1866 1867 1868
 *
 * Returns 0 on success, errno otherwise
 */
1869 1870
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
1871
{
1872
	struct sock *sk = sock->sk;
1873
	struct tipc_sock *tsk = tipc_sk(sk);
1874 1875
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
1876
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
1877
	int previous;
1878
	int res = 0;
1879

1880 1881
	lock_sock(sk);

1882
	/* DGRAM/RDM connect(), just save the destaddr */
1883
	if (tipc_sk_type_connectionless(sk)) {
1884
		if (dst->family == AF_UNSPEC) {
1885
			memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1886 1887
		} else if (destlen != sizeof(struct sockaddr_tipc)) {
			res = -EINVAL;
1888
		} else {
1889
			memcpy(&tsk->peer, dest, destlen);
1890
		}
1891 1892
		goto exit;
	}
1893 1894 1895 1896 1897 1898 1899

	/*
	 * Reject connection attempt using multicast address
	 *
	 * Note: send_msg() validates the rest of the address fields,
	 *       so there's no need to do it here
	 */
1900 1901 1902 1903 1904
	if (dst->addrtype == TIPC_ADDR_MCAST) {
		res = -EINVAL;
		goto exit;
	}

1905
	previous = sk->sk_state;
1906 1907 1908

	switch (sk->sk_state) {
	case TIPC_OPEN:
1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

1919
		res = __tipc_sendmsg(sock, &m, 0);
1920 1921 1922
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

1923
		/* Just entered TIPC_CONNECTING state; the only
1924 1925 1926 1927
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
1928 1929 1930 1931 1932
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
1933
			goto exit;
1934
		}
Y
Ying Xue 已提交
1935 1936 1937
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
1938 1939
		break;
	case TIPC_ESTABLISHED:
1940
		res = -EISCONN;
1941 1942
		break;
	default:
1943
		res = -EINVAL;
1944
	}
1945

1946 1947
exit:
	release_sock(sk);
1948
	return res;
P
Per Liden 已提交
1949 1950
}

1951
/**
1952
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
1953 1954
 * @sock: socket structure
 * @len: (unused)
1955
 *
P
Per Liden 已提交
1956 1957
 * Returns 0 on success, errno otherwise
 */
1958
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
1959
{
1960 1961 1962 1963
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
1964
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
1965
	release_sock(sk);
1966

1967
	return res;
P
Per Liden 已提交
1968 1969
}

Y
Ying Xue 已提交
1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
1984
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1995 1996 1997
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
1998 1999 2000 2001 2002
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2003
/**
2004
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2005 2006 2007
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2008
 *
P
Per Liden 已提交
2009 2010
 * Returns 0 on success, errno otherwise
 */
2011
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
P
Per Liden 已提交
2012
{
2013
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2014
	struct sk_buff *buf;
2015
	struct tipc_sock *new_tsock;
2016
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2017
	long timeo;
2018
	int res;
P
Per Liden 已提交
2019

2020
	lock_sock(sk);
P
Per Liden 已提交
2021

2022
	if (sk->sk_state != TIPC_LISTEN) {
2023
		res = -EINVAL;
P
Per Liden 已提交
2024 2025
		goto exit;
	}
Y
Ying Xue 已提交
2026 2027 2028 2029
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2030 2031 2032

	buf = skb_peek(&sk->sk_receive_queue);

2033
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
2034 2035
	if (res)
		goto exit;
2036
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2037

2038
	new_sk = new_sock->sk;
2039
	new_tsock = tipc_sk(new_sk);
2040
	msg = buf_msg(buf);
P
Per Liden 已提交
2041

2042 2043 2044 2045 2046 2047 2048
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2049
	tsk_rej_rx_queue(new_sk);
2050 2051

	/* Connect new socket to it's peer */
2052
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2053

2054
	tsk_set_importance(new_tsock, msg_importance(msg));
2055
	if (msg_named(msg)) {
2056 2057
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2058
	}
2059 2060 2061 2062 2063 2064 2065 2066

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2067
		tsk_advance_rx_queue(sk);
2068
		__tipc_sendstream(new_sock, &m, 0);
2069 2070 2071
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2072
		skb_set_owner_r(buf, new_sk);
2073 2074
	}
	release_sock(new_sk);
P
Per Liden 已提交
2075
exit:
2076
	release_sock(sk);
P
Per Liden 已提交
2077 2078 2079 2080
	return res;
}

/**
2081
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2082
 * @sock: socket structure
2083
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2084 2085
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2086
 *
P
Per Liden 已提交
2087 2088
 * Returns 0 on success, errno otherwise
 */
2089
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2090
{
2091
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2092 2093
	int res;

2094 2095
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2096

2097
	lock_sock(sk);
P
Per Liden 已提交
2098

2099 2100
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2101

2102
	if (sk->sk_state == TIPC_DISCONNECTING) {
2103
		/* Discard any unreceived messages */
2104
		__skb_queue_purge(&sk->sk_receive_queue);
2105 2106 2107

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2108
		res = 0;
2109
	} else {
P
Per Liden 已提交
2110 2111 2112
		res = -ENOTCONN;
	}

2113
	release_sock(sk);
P
Per Liden 已提交
2114 2115 2116
	return res;
}

2117
static void tipc_sk_timeout(unsigned long data)
2118
{
2119 2120
	struct tipc_sock *tsk = (struct tipc_sock *)data;
	struct sock *sk = &tsk->sk;
2121
	struct sk_buff *skb = NULL;
2122
	u32 peer_port, peer_node;
2123
	u32 own_node = tsk_own_node(tsk);
2124

J
Jon Paul Maloy 已提交
2125
	bh_lock_sock(sk);
2126
	if (!tipc_sk_connected(sk)) {
J
Jon Paul Maloy 已提交
2127 2128
		bh_unlock_sock(sk);
		goto exit;
2129
	}
2130 2131
	peer_port = tsk_peer_port(tsk);
	peer_node = tsk_peer_node(tsk);
2132

2133
	if (tsk->probe_unacked) {
2134
		if (!sock_owned_by_user(sk)) {
2135
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2136 2137 2138 2139 2140 2141 2142 2143
			tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
					      tsk_peer_port(tsk));
			sk->sk_state_change(sk);
		} else {
			/* Try again later */
			sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
		}

2144 2145
		bh_unlock_sock(sk);
		goto exit;
2146
	}
2147 2148 2149 2150

	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
			      INT_H_SIZE, 0, peer_node, own_node,
			      peer_port, tsk->portid, TIPC_OK);
2151
	tsk->probe_unacked = true;
2152
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
2153
	bh_unlock_sock(sk);
2154
	if (skb)
2155
		tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
J
Jon Paul Maloy 已提交
2156
exit:
2157
	sock_put(sk);
2158 2159
}

2160
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2161 2162
			   struct tipc_name_seq const *seq)
{
2163 2164
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2165 2166 2167
	struct publication *publ;
	u32 key;

2168
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2169
		return -EINVAL;
2170 2171
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2172 2173
		return -EADDRINUSE;

2174
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2175
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2176 2177 2178
	if (unlikely(!publ))
		return -EINVAL;

2179 2180 2181
	list_add(&publ->pport_list, &tsk->publications);
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2182 2183 2184
	return 0;
}

2185
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2186 2187
			    struct tipc_name_seq const *seq)
{
2188
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2189 2190 2191 2192
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

2193
	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
J
Jon Paul Maloy 已提交
2194 2195 2196 2197 2198 2199 2200 2201 2202
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2203
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2204 2205 2206 2207
					      publ->ref, publ->key);
			rc = 0;
			break;
		}
2208
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2209 2210 2211
				      publ->ref, publ->key);
		rc = 0;
	}
2212 2213
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2214 2215 2216
	return rc;
}

2217 2218 2219
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2220
void tipc_sk_reinit(struct net *net)
2221
{
2222
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2223 2224 2225
	const struct bucket_table *tbl;
	struct rhash_head *pos;
	struct tipc_sock *tsk;
2226
	struct tipc_msg *msg;
2227
	int i;
2228

2229
	rcu_read_lock();
2230
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2231 2232 2233 2234
	for (i = 0; i < tbl->size; i++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2235 2236
			msg_set_prevnode(msg, tn->own_addr);
			msg_set_orignode(msg, tn->own_addr);
2237 2238
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2239
	}
2240
	rcu_read_unlock();
2241 2242
}

2243
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2244
{
2245
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2246
	struct tipc_sock *tsk;
2247

2248
	rcu_read_lock();
2249
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2250 2251 2252
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2253

2254
	return tsk;
2255 2256
}

2257
static int tipc_sk_insert(struct tipc_sock *tsk)
2258
{
2259 2260 2261
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2262 2263
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2264

2265 2266 2267 2268 2269 2270
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2271 2272
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2273 2274
			return 0;
		sock_put(&tsk->sk);
2275 2276
	}

2277
	return -1;
2278 2279
}

2280
static void tipc_sk_remove(struct tipc_sock *tsk)
2281
{
2282
	struct sock *sk = &tsk->sk;
2283
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2284

2285
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2286 2287
		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
		__sock_put(sk);
2288 2289 2290
	}
}

2291 2292 2293 2294 2295 2296 2297
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2298
	.automatic_shrinking = true,
2299 2300
};

2301
int tipc_sk_rht_init(struct net *net)
2302
{
2303
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2304 2305

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2306 2307
}

2308
void tipc_sk_rht_destroy(struct net *net)
2309
{
2310 2311
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2312 2313
	/* Wait for socket readers to complete */
	synchronize_net();
2314

2315
	rhashtable_destroy(&tn->sk_rht);
2316 2317
}

P
Per Liden 已提交
2318
/**
2319
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2320 2321 2322 2323 2324
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2325 2326
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2327
 * (to ease compatibility).
2328
 *
P
Per Liden 已提交
2329 2330
 * Returns 0 on success, errno otherwise
 */
2331 2332
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2333
{
2334
	struct sock *sk = sock->sk;
2335
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
2336 2337 2338
	u32 value;
	int res;

2339 2340
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2341 2342 2343 2344
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
	if (ol < sizeof(value))
		return -EINVAL;
2345 2346
	res = get_user(value, (u32 __user *)ov);
	if (res)
P
Per Liden 已提交
2347 2348
		return res;

2349
	lock_sock(sk);
2350

P
Per Liden 已提交
2351 2352
	switch (opt) {
	case TIPC_IMPORTANCE:
2353
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2354 2355 2356
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2357
			tsk_set_unreliable(tsk, value);
2358
		else
P
Per Liden 已提交
2359 2360 2361
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2362
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2363 2364
		break;
	case TIPC_CONN_TIMEOUT:
2365
		tipc_sk(sk)->conn_timeout = value;
2366
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2367 2368 2369 2370 2371
		break;
	default:
		res = -EINVAL;
	}

2372 2373
	release_sock(sk);

P
Per Liden 已提交
2374 2375 2376 2377
	return res;
}

/**
2378
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2379 2380 2381 2382 2383
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2384 2385
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2386
 * (to ease compatibility).
2387
 *
P
Per Liden 已提交
2388 2389
 * Returns 0 on success, errno otherwise
 */
2390 2391
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2392
{
2393
	struct sock *sk = sock->sk;
2394
	struct tipc_sock *tsk = tipc_sk(sk);
2395
	int len;
P
Per Liden 已提交
2396
	u32 value;
2397
	int res;
P
Per Liden 已提交
2398

2399 2400
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2401 2402
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2403 2404
	res = get_user(len, ol);
	if (res)
2405
		return res;
P
Per Liden 已提交
2406

2407
	lock_sock(sk);
P
Per Liden 已提交
2408 2409 2410

	switch (opt) {
	case TIPC_IMPORTANCE:
2411
		value = tsk_importance(tsk);
P
Per Liden 已提交
2412 2413
		break;
	case TIPC_SRC_DROPPABLE:
2414
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2415 2416
		break;
	case TIPC_DEST_DROPPABLE:
2417
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2418 2419
		break;
	case TIPC_CONN_TIMEOUT:
2420
		value = tsk->conn_timeout;
2421
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2422
		break;
2423
	case TIPC_NODE_RECVQ_DEPTH:
2424
		value = 0; /* was tipc_queue_size, now obsolete */
2425
		break;
2426
	case TIPC_SOCK_RECVQ_DEPTH:
2427 2428
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
P
Per Liden 已提交
2429 2430 2431 2432
	default:
		res = -EINVAL;
	}

2433 2434
	release_sock(sk);

2435 2436
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2437

2438 2439 2440 2441 2442 2443 2444
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2445 2446
}

2447
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2448
{
2449
	struct sock *sk = sock->sk;
E
Erik Hugne 已提交
2450 2451 2452 2453 2454 2455 2456
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
2457 2458
		if (!tipc_node_get_linkname(sock_net(sk),
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

2470 2471
/* Protocol switches for the various types of TIPC sockets */

2472
static const struct proto_ops msg_ops = {
2473
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2474
	.family		= AF_TIPC,
2475 2476 2477
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2478
	.socketpair	= sock_no_socketpair,
2479
	.accept		= sock_no_accept,
2480 2481
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2482
	.ioctl		= tipc_ioctl,
2483
	.listen		= sock_no_listen,
2484 2485 2486 2487 2488
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
2489 2490
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2491 2492
};

2493
static const struct proto_ops packet_ops = {
2494
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2495
	.family		= AF_TIPC,
2496 2497 2498
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2499
	.socketpair	= sock_no_socketpair,
2500 2501 2502
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2503
	.ioctl		= tipc_ioctl,
2504 2505 2506 2507 2508 2509
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
2510 2511
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2512 2513
};

2514
static const struct proto_ops stream_ops = {
2515
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2516
	.family		= AF_TIPC,
2517 2518 2519
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2520
	.socketpair	= sock_no_socketpair,
2521 2522 2523
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2524
	.ioctl		= tipc_ioctl,
2525 2526 2527 2528
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
2529
	.sendmsg	= tipc_sendstream,
2530
	.recvmsg	= tipc_recv_stream,
2531 2532
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2533 2534
};

2535
static const struct net_proto_family tipc_family_ops = {
2536
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2537
	.family		= AF_TIPC,
2538
	.create		= tipc_sk_create
P
Per Liden 已提交
2539 2540 2541 2542 2543
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
2544 2545
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
2546 2547 2548
};

/**
2549
 * tipc_socket_init - initialize TIPC socket interface
2550
 *
P
Per Liden 已提交
2551 2552
 * Returns 0 on success, errno otherwise
 */
2553
int tipc_socket_init(void)
P
Per Liden 已提交
2554 2555 2556
{
	int res;

2557
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
2558
	if (res) {
2559
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
2560 2561 2562 2563 2564
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
2565
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
2566 2567 2568 2569 2570 2571 2572 2573
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
2574
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
2575
 */
2576
void tipc_socket_stop(void)
P
Per Liden 已提交
2577 2578 2579 2580
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
2581 2582

/* Caller should hold socket lock for the passed tipc socket. */
2583
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2618 2619
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
2620 2621 2622 2623
{
	int err;
	void *hdr;
	struct nlattr *attrs;
2624 2625
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2626
	struct sock *sk = &tsk->sk;
2627 2628

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2629
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
2630 2631 2632 2633 2634 2635
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
2636
	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
2637
		goto attr_msg_cancel;
2638
	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
2639 2640
		goto attr_msg_cancel;

2641
	if (tipc_sk_connected(sk)) {
2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665
		err = __tipc_nl_add_sk_con(skb, tsk);
		if (err)
			goto attr_msg_cancel;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			goto attr_msg_cancel;
	}
	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
	struct tipc_sock *tsk;
2666 2667
	const struct bucket_table *tbl;
	struct rhash_head *pos;
2668 2669
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2670 2671
	u32 tbl_id = cb->args[0];
	u32 prev_portid = cb->args[1];
2672

2673
	rcu_read_lock();
2674
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2675 2676
	for (; tbl_id < tbl->size; tbl_id++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
2677
			spin_lock_bh(&tsk->sk.sk_lock.slock);
2678 2679 2680 2681 2682
			if (prev_portid && prev_portid != tsk->portid) {
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				continue;
			}

2683
			err = __tipc_nl_add_sk(skb, cb, tsk);
2684 2685 2686 2687 2688 2689
			if (err) {
				prev_portid = tsk->portid;
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				goto out;
			}
			prev_portid = 0;
2690 2691
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2692
	}
2693
out:
2694
	rcu_read_unlock();
2695 2696
	cb->args[0] = tbl_id;
	cb->args[1] = prev_portid;
2697 2698 2699

	return skb->len;
}
2700 2701

/* Caller should hold socket lock for the passed tipc socket. */
2702 2703 2704
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
2705 2706 2707 2708 2709
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2710
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2741 2742 2743
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783
{
	int err;
	struct publication *p;

	if (*last_publ) {
		list_for_each_entry(p, &tsk->publications, pport_list) {
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
				     pport_list);
	}

	list_for_each_entry_from(p, &tsk->publications, pport_list) {
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
2784
	u32 tsk_portid = cb->args[0];
2785 2786
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
2787
	struct net *net = sock_net(skb->sk);
2788 2789
	struct tipc_sock *tsk;

2790
	if (!tsk_portid) {
2791 2792 2793 2794 2795 2796 2797
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

2798 2799 2800
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

2801 2802 2803 2804 2805 2806 2807 2808 2809
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
				       tipc_nl_sock_policy);
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

2810
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
2811 2812 2813 2814 2815
	}

	if (done)
		return 0;

2816
	tsk = tipc_sk_lookup(net, tsk_portid);
2817 2818 2819 2820 2821 2822 2823 2824
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
2825
	sock_put(&tsk->sk);
2826

2827
	cb->args[0] = tsk_portid;
2828 2829 2830 2831 2832
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}