socket.c 71.4 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
4
 * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
P
Per Liden 已提交
38
#include "core.h"
39
#include "name_table.h"
E
Erik Hugne 已提交
40
#include "node.h"
41
#include "link.h"
42
#include "name_distr.h"
43
#include "socket.h"
44
#include "bcast.h"
45
#include "netlink.h"
46

47
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
48
#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
49 50 51
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
52

53 54
enum {
	TIPC_LISTEN = TCP_LISTEN,
55
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
56
	TIPC_OPEN = TCP_CLOSE,
57
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
58
	TIPC_CONNECTING = TCP_SYN_SENT,
59 60
};

61 62 63 64 65 66 67
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
68
 * @portid: unique port identity in TIPC socket hash table
69
 * @phdr: preformatted message header used when sending messages
70
 * #cong_links: list of congested links
71
 * @publications: list of publications for port
72
 * @blocking_link: address of the congested link we are currently sleeping on
73 74 75 76
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
77
 * @cong_link_cnt: number of congested links
78 79
 * @sent_unacked: # messages sent by socket, and not yet acked by peer
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
80
 * @peer: 'connected' peer for dgram/rdm
81 82
 * @node: hash table node
 * @rcu: rcu struct for tipc_sock
83 84 85 86 87 88 89
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
90
	u32 portid;
91
	struct tipc_msg phdr;
92
	struct list_head cong_links;
93 94 95 96
	struct list_head publications;
	u32 pub_count;
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
97
	bool probe_unacked;
98
	u16 cong_link_cnt;
99 100
	u16 snt_unacked;
	u16 snd_win;
101
	u16 peer_caps;
102 103
	u16 rcv_unacked;
	u16 rcv_win;
104
	struct sockaddr_tipc peer;
105 106
	struct rhash_head node;
	struct rcu_head rcu;
107
};
P
Per Liden 已提交
108

109
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
110
static void tipc_data_ready(struct sock *sk);
111
static void tipc_write_space(struct sock *sk);
112
static void tipc_sock_destruct(struct sock *sk);
113 114
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
115
static void tipc_sk_timeout(unsigned long data);
116
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
117
			   struct tipc_name_seq const *seq);
118
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
119
			    struct tipc_name_seq const *seq);
120
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
121 122
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
123
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
124
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
125

126 127 128
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
129
static struct proto tipc_proto;
130 131
static const struct rhashtable_params tsk_rht_params;

132 133 134 135 136
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

137
static u32 tsk_peer_node(struct tipc_sock *tsk)
138
{
139
	return msg_destnode(&tsk->phdr);
140 141
}

142
static u32 tsk_peer_port(struct tipc_sock *tsk)
143
{
144
	return msg_destport(&tsk->phdr);
145 146
}

147
static  bool tsk_unreliable(struct tipc_sock *tsk)
148
{
149
	return msg_src_droppable(&tsk->phdr) != 0;
150 151
}

152
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
153
{
154
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
155 156
}

157
static bool tsk_unreturnable(struct tipc_sock *tsk)
158
{
159
	return msg_dest_droppable(&tsk->phdr) != 0;
160 161
}

162
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
163
{
164
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
165 166
}

167
static int tsk_importance(struct tipc_sock *tsk)
168
{
169
	return msg_importance(&tsk->phdr);
170 171
}

172
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
173 174 175
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
176
	msg_set_importance(&tsk->phdr, (u32)imp);
177 178
	return 0;
}
179

180 181 182 183 184
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

185
static bool tsk_conn_cong(struct tipc_sock *tsk)
186
{
187
	return tsk->snt_unacked > tsk->snd_win;
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
}

/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
208 209
}

210
/**
211
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
212 213
 *
 * Caller must hold socket lock
P
Per Liden 已提交
214
 */
215
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
216
{
217
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
218 219
}

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
236
/**
237
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
238 239
 *
 * Caller must hold socket lock
P
Per Liden 已提交
240
 */
241
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
242
{
243
	struct sk_buff *skb;
244

245 246
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
247 248
}

249 250
static bool tipc_sk_connected(struct sock *sk)
{
251
	return sk->sk_state == TIPC_ESTABLISHED;
252 253
}

254 255 256 257 258 259 260 261 262 263
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

264
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
265 266 267 268
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
269
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
270
{
271 272
	struct sock *sk = &tsk->sk;
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
273
	u32 peer_port = tsk_peer_port(tsk);
J
Jon Paul Maloy 已提交
274 275 276
	u32 orig_node;
	u32 peer_node;

277
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
278 279 280 281 282 283
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
284
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
285 286 287 288

	if (likely(orig_node == peer_node))
		return true;

289
	if (!orig_node && (peer_node == tn->own_addr))
J
Jon Paul Maloy 已提交
290 291
		return true;

292
	if (!peer_node && (orig_node == tn->own_addr))
J
Jon Paul Maloy 已提交
293 294 295 296 297
		return true;

	return false;
}

298 299 300 301 302 303 304 305 306
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
307
	int oldsk_state = sk->sk_state;
308 309 310
	int res = -EINVAL;

	switch (state) {
311 312 313
	case TIPC_OPEN:
		res = 0;
		break;
314
	case TIPC_LISTEN:
315
	case TIPC_CONNECTING:
316
		if (oldsk_state == TIPC_OPEN)
317 318
			res = 0;
		break;
319
	case TIPC_ESTABLISHED:
320
		if (oldsk_state == TIPC_CONNECTING ||
321
		    oldsk_state == TIPC_OPEN)
322 323
			res = 0;
		break;
324
	case TIPC_DISCONNECTING:
325
		if (oldsk_state == TIPC_CONNECTING ||
326 327 328
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
329 330 331 332 333 334 335 336
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

#define tipc_wait_for_cond(sock_, timeout_, condition_)			\
({								        \
	int rc_ = 0;							\
	int done_ = 0;							\
									\
	while (!(condition_) && !done_) {				\
		struct sock *sk_ = sock->sk;				\
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);		\
									\
		rc_ = tipc_sk_sock_err(sock_, timeout_);		\
		if (rc_)						\
			break;						\
		prepare_to_wait(sk_sleep(sk_), &wait_,			\
				TASK_INTERRUPTIBLE);			\
		done_ = sk_wait_event(sk_, timeout_,			\
				      (condition_), &wait_);		\
		remove_wait_queue(sk_sleep(sk_), &wait_);		\
	}								\
	rc_;								\
})

P
Per Liden 已提交
380
/**
381
 * tipc_sk_create - create a TIPC socket
382
 * @net: network namespace (must be default network)
P
Per Liden 已提交
383 384
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
385
 * @kern: caused by kernel or by userspace?
386
 *
387 388
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
389 390 391
 *
 * Returns 0 on success, errno otherwise
 */
392 393
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
394
{
395
	struct tipc_net *tn;
396
	const struct proto_ops *ops;
P
Per Liden 已提交
397
	struct sock *sk;
398
	struct tipc_sock *tsk;
399
	struct tipc_msg *msg;
400 401

	/* Validate arguments */
P
Per Liden 已提交
402 403 404 405 406
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
407
		ops = &stream_ops;
P
Per Liden 已提交
408 409
		break;
	case SOCK_SEQPACKET:
410
		ops = &packet_ops;
P
Per Liden 已提交
411 412 413
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
414
		ops = &msg_ops;
P
Per Liden 已提交
415
		break;
416 417
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
418 419
	}

420
	/* Allocate socket's protocol area */
421
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
422
	if (sk == NULL)
P
Per Liden 已提交
423 424
		return -ENOMEM;

425
	tsk = tipc_sk(sk);
426 427
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
428
	INIT_LIST_HEAD(&tsk->cong_links);
429
	msg = &tsk->phdr;
430 431
	tn = net_generic(sock_net(sk), tipc_net_id);
	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
432
		      NAMED_H_SIZE, 0);
P
Per Liden 已提交
433

434 435 436
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
437
	tipc_set_sk_state(sk, TIPC_OPEN);
438
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
439
		pr_warn("Socket create failed; port number exhausted\n");
440 441 442
		return -EINVAL;
	}
	msg_set_origport(msg, tsk->portid);
443
	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
444
	sk->sk_shutdown = 0;
445
	sk->sk_backlog_rcv = tipc_backlog_rcv;
446
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
447 448
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
449
	sk->sk_destruct = tipc_sock_destruct;
450 451
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
	atomic_set(&tsk->dupl_rcvcnt, 0);
452

453 454 455 456
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

457
	if (tipc_sk_type_connectionless(sk)) {
458
		tsk_set_unreturnable(tsk, true);
459
		if (sock->type == SOCK_DGRAM)
460
			tsk_set_unreliable(tsk, true);
461
	}
462

P
Per Liden 已提交
463 464 465
	return 0;
}

466 467 468 469 470 471 472
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

473 474 475 476 477 478
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
479
	long timeout = CONN_TIMEOUT_DEFAULT;
480 481 482
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

483 484 485 486
	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
					    !tsk_conn_cong(tsk)));

487 488 489 490 491 492
	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
493
			continue;
494
		}
495 496 497 498 499 500
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
501
	}
502 503 504 505

	if (tipc_sk_type_connectionless(sk))
		return;

506 507 508 509 510 511 512
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
513 514
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
515 516 517
	}
}

P
Per Liden 已提交
518
/**
519
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
520 521 522 523 524 525 526
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
527
 *
P
Per Liden 已提交
528 529 530 531 532 533
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
534
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
535 536
{
	struct sock *sk = sock->sk;
537
	struct tipc_sock *tsk;
P
Per Liden 已提交
538

539 540 541 542 543
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
544
		return 0;
545

546
	tsk = tipc_sk(sk);
547 548
	lock_sock(sk);

549 550
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
551
	tipc_sk_withdraw(tsk, 0, NULL);
552
	sk_stop_timer(sk, &sk->sk_timer);
553
	tipc_sk_remove(tsk);
P
Per Liden 已提交
554

555 556
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
557 558
	u32_list_purge(&tsk->cong_links);
	tsk->cong_link_cnt = 0;
559
	call_rcu(&tsk->rcu, tipc_sk_callback);
560
	sock->sk = NULL;
P
Per Liden 已提交
561

562
	return 0;
P
Per Liden 已提交
563 564 565
}

/**
566
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
567 568 569
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
570
 *
P
Per Liden 已提交
571 572 573
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
574
 *
P
Per Liden 已提交
575
 * Returns 0 on success, errno otherwise
576 577 578
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
579
 */
580 581
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
582
{
583
	struct sock *sk = sock->sk;
P
Per Liden 已提交
584
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
585
	struct tipc_sock *tsk = tipc_sk(sk);
586
	int res = -EINVAL;
P
Per Liden 已提交
587

588 589
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
590
		res = tipc_sk_withdraw(tsk, 0, NULL);
591 592
		goto exit;
	}
593

594 595 596 597 598 599 600 601
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
602 603 604

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
605 606 607 608
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
609

610
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
611
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
612 613 614 615
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
616

617
	res = (addr->scope > 0) ?
618 619
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
620 621 622
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
623 624
}

625
/**
626
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
627 628 629
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
630
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
631
 *
P
Per Liden 已提交
632
 * Returns 0 on success, errno otherwise
633
 *
634 635
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
636
 *       a completely predictable manner).
P
Per Liden 已提交
637
 */
638 639
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
			int *uaddr_len, int peer)
P
Per Liden 已提交
640 641
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
642 643
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
644
	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
P
Per Liden 已提交
645

646
	memset(addr, 0, sizeof(*addr));
647
	if (peer) {
648
		if ((!tipc_sk_connected(sk)) &&
649
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
650
			return -ENOTCONN;
651 652
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
653
	} else {
654
		addr->addr.id.ref = tsk->portid;
655
		addr->addr.id.node = tn->own_addr;
656
	}
P
Per Liden 已提交
657 658 659 660 661 662 663

	*uaddr_len = sizeof(*addr);
	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

664
	return 0;
P
Per Liden 已提交
665 666 667
}

/**
668
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
669 670 671 672
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
 * @wait: ???
 *
673 674 675 676 677 678 679 680
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
681 682 683
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
684
 */
685 686
static unsigned int tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
687
{
688
	struct sock *sk = sock->sk;
689
	struct tipc_sock *tsk = tipc_sk(sk);
690
	u32 mask = 0;
691

692
	sock_poll_wait(file, sk_sleep(sk), wait);
693

694 695 696 697 698
	if (sk->sk_shutdown & RCV_SHUTDOWN)
		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
	if (sk->sk_shutdown == SHUTDOWN_MASK)
		mask |= POLLHUP;

699 700
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
701
		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
702
			mask |= POLLOUT;
703 704 705
		/* fall thru' */
	case TIPC_LISTEN:
	case TIPC_CONNECTING:
706 707
		if (!skb_queue_empty(&sk->sk_receive_queue))
			mask |= (POLLIN | POLLRDNORM);
708 709
		break;
	case TIPC_OPEN:
710
		if (!tsk->cong_link_cnt)
711 712 713 714 715 716 717 718
			mask |= POLLOUT;
		if (tipc_sk_type_connectionless(sk) &&
		    (!skb_queue_empty(&sk->sk_receive_queue)))
			mask |= (POLLIN | POLLRDNORM);
		break;
	case TIPC_DISCONNECTING:
		mask = (POLLIN | POLLRDNORM | POLLHUP);
		break;
719
	}
720 721

	return mask;
P
Per Liden 已提交
722 723
}

724 725 726 727
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
728
 * @msg: message to send
729 730
 * @dlen: length of data to send
 * @timeout: timeout to wait for wakeup
731 732 733 734 735
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
736
			  struct msghdr *msg, size_t dlen, long timeout)
737 738
{
	struct sock *sk = sock->sk;
739
	struct tipc_sock *tsk = tipc_sk(sk);
740
	struct tipc_msg *hdr = &tsk->phdr;
741
	struct net *net = sock_net(sk);
742
	int mtu = tipc_bcast_get_mtu(net);
743
	u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE);
744
	struct sk_buff_head pkts;
745
	struct tipc_nlist dsts;
746 747
	int rc;

748
	/* Block or return if any destination link is congested */
749 750 751
	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;
752

753 754 755 756 757 758 759 760
	/* Lookup destination nodes */
	tipc_nlist_init(&dsts, tipc_own_addr(net));
	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
				      seq->upper, domain, &dsts);
	if (!dsts.local && !dsts.remote)
		return -EHOSTUNREACH;

	/* Build message header */
761
	msg_set_type(hdr, TIPC_MCAST_MSG);
762
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
763 764 765 766 767 768 769
	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nametype(hdr, seq->type);
	msg_set_namelower(hdr, seq->lower);
	msg_set_nameupper(hdr, seq->upper);

770
	/* Build message as chain of buffers */
771 772
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
773

774 775 776 777 778 779
	/* Send message if build was successful */
	if (unlikely(rc == dlen))
		rc = tipc_mcast_xmit(net, &pkts, &dsts,
				     &tsk->cong_link_cnt);

	tipc_nlist_purge(&dsts);
780 781

	return rc ? rc : dlen;
782 783
}

784 785 786 787 788 789
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
790
 */
791 792
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
793
{
794
	struct tipc_msg *msg;
795
	struct list_head dports;
796
	u32 portid;
797
	u32 scope = TIPC_CLUSTER_SCOPE;
798 799 800
	struct sk_buff_head tmpq;
	uint hsz;
	struct sk_buff *skb, *_skb;
801

802
	__skb_queue_head_init(&tmpq);
803
	INIT_LIST_HEAD(&dports);
804

805 806 807 808 809 810 811 812 813 814 815 816
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
		msg = buf_msg(skb);
		hsz = skb_headroom(skb) + msg_hdr_sz(msg);

		if (in_own_node(net, msg_orignode(msg)))
			scope = TIPC_NODE_SCOPE;

		/* Create destination port list and message clones: */
		tipc_nametbl_mc_translate(net,
					  msg_nametype(msg), msg_namelower(msg),
					  msg_nameupper(msg), scope, &dports);
817 818
		portid = u32_pop(&dports);
		for (; portid; portid = u32_pop(&dports)) {
819 820 821 822 823 824 825
			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
826
		}
827 828 829 830 831 832 833 834 835
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
836
	}
837
	tipc_sk_rcv(net, inputq);
838 839
}

840 841 842
/**
 * tipc_sk_proto_rcv - receive a connection mng protocol message
 * @tsk: receiving socket
843
 * @skb: pointer to message buffer.
844
 */
J
Jon Paul Maloy 已提交
845 846
static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
			      struct sk_buff_head *xmitq)
847
{
848
	struct sock *sk = &tsk->sk;
J
Jon Paul Maloy 已提交
849
	u32 onode = tsk_own_node(tsk);
850 851
	struct tipc_msg *hdr = buf_msg(skb);
	int mtyp = msg_type(hdr);
852
	bool conn_cong;
853

854
	/* Ignore if connection cannot be validated: */
855
	if (!tsk_peer_msg(tsk, hdr))
856 857
		goto exit;

858
	tsk->probe_unacked = false;
859

860 861
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
862 863
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
864 865
		return;
	} else if (mtyp == CONN_ACK) {
866
		conn_cong = tsk_conn_cong(tsk);
867 868 869
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
870
		if (conn_cong)
871 872 873
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
874 875
	}
exit:
876
	kfree_skb(skb);
877 878
}

P
Per Liden 已提交
879
/**
880
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
881 882
 * @sock: socket structure
 * @m: message to send
883
 * @dsz: amount of user data to be sent
884
 *
P
Per Liden 已提交
885
 * Message must have an destination specified explicitly.
886
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
887 888
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
889
 *
P
Per Liden 已提交
890 891
 * Returns the number of bytes sent on success, or errno otherwise
 */
892
static int tipc_sendmsg(struct socket *sock,
893
			struct msghdr *m, size_t dsz)
894 895 896 897 898 899 900 901 902 903 904
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

905
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
906
{
907
	struct sock *sk = sock->sk;
908
	struct net *net = sock_net(sk);
909 910 911 912 913 914
	struct tipc_sock *tsk = tipc_sk(sk);
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct list_head *clinks = &tsk->cong_links;
	bool syn = !tipc_sk_type_connectionless(sk);
	struct tipc_msg *hdr = &tsk->phdr;
915
	struct tipc_name_seq *seq;
916 917 918 919
	struct sk_buff_head pkts;
	u32 type, inst, domain;
	u32 dnode, dport;
	int mtu, rc;
P
Per Liden 已提交
920

921
	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
922
		return -EMSGSIZE;
923

924
	if (unlikely(!dest)) {
925 926
		dest = &tsk->peer;
		if (!syn || dest->family != AF_TIPC)
927 928
			return -EDESTADDRREQ;
	}
929 930 931 932 933 934 935 936

	if (unlikely(m->msg_namelen < sizeof(*dest)))
		return -EINVAL;

	if (unlikely(dest->family != AF_TIPC))
		return -EINVAL;

	if (unlikely(syn)) {
937
		if (sk->sk_state == TIPC_LISTEN)
938
			return -EPIPE;
939
		if (sk->sk_state != TIPC_OPEN)
940 941 942
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
943
		if (dest->addrtype == TIPC_ADDR_NAME) {
944 945
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
946
		}
P
Per Liden 已提交
947
	}
948

949 950 951
	seq = &dest->addr.nameseq;
	if (dest->addrtype == TIPC_ADDR_MCAST)
		return tipc_sendmcast(sock, seq, m, dlen, timeout);
952

953 954 955 956
	if (dest->addrtype == TIPC_ADDR_NAME) {
		type = dest->addr.name.name.type;
		inst = dest->addr.name.name.instance;
		domain = dest->addr.name.domain;
957
		dnode = domain;
958 959 960 961 962
		msg_set_type(hdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
		msg_set_nametype(hdr, type);
		msg_set_nameinst(hdr, inst);
		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
963
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
964 965
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dport);
966 967
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
968

969 970
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
971 972 973 974 975
		msg_set_type(hdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(hdr, 0);
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dest->addr.id.ref);
		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
976 977
	}

978 979 980 981 982 983
	/* Block or return if destination link is congested */
	rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
	if (unlikely(rc))
		return rc;

	skb_queue_head_init(&pkts);
984
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
985 986
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
987
		return rc;
988

989 990 991 992 993 994
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
		u32_push(clinks, dnode);
		tsk->cong_link_cnt++;
		rc = 0;
	}
995

996 997 998 999
	if (unlikely(syn && !rc))
		tipc_set_sk_state(sk, TIPC_CONNECTING);

	return rc ? rc : dlen;
P
Per Liden 已提交
1000 1001
}

1002
/**
1003
 * tipc_sendstream - send stream-oriented data
P
Per Liden 已提交
1004
 * @sock: socket structure
1005 1006
 * @m: data to send
 * @dsz: total length of data to be transmitted
1007
 *
1008
 * Used for SOCK_STREAM data.
1009
 *
1010 1011
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1012
 */
1013
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1014 1015 1016 1017 1018
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
1019
	ret = __tipc_sendstream(sock, m, dsz);
1020 1021 1022 1023 1024
	release_sock(sk);

	return ret;
}

1025
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1026
{
1027
	struct sock *sk = sock->sk;
1028
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1029 1030 1031 1032 1033 1034 1035 1036
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = &tsk->phdr;
	struct net *net = sock_net(sk);
	struct sk_buff_head pkts;
	u32 dnode = tsk_peer_node(tsk);
	int send, sent = 0;
	int rc = 0;
1037

1038
	skb_queue_head_init(&pkts);
1039

1040 1041
	if (unlikely(dlen > INT_MAX))
		return -EMSGSIZE;
1042

1043 1044 1045 1046 1047
	/* Handle implicit connection setup */
	if (unlikely(dest)) {
		rc = __tipc_sendmsg(sock, m, dlen);
		if (dlen && (dlen == rc))
			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1048
		return rc;
1049
	}
1050

1051
	do {
1052 1053
		rc = tipc_wait_for_cond(sock, &timeout,
					(!tsk->cong_link_cnt &&
1054 1055
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
		if (unlikely(rc))
			break;

		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
		if (unlikely(rc != send))
			break;

		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
		if (unlikely(rc == -ELINKCONG)) {
			tsk->cong_link_cnt = 1;
			rc = 0;
		}
		if (likely(!rc)) {
			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
			sent += send;
		}
	} while (sent < dlen && !rc);
1074

1075
	return rc ? rc : sent;
P
Per Liden 已提交
1076 1077
}

1078
/**
1079
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1080
 * @sock: socket structure
1081 1082
 * @m: message to send
 * @dsz: length of data to be transmitted
1083
 *
1084
 * Used for SOCK_SEQPACKET messages.
1085
 *
1086
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1087
 */
1088
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1089
{
1090 1091
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1092

1093
	return tipc_sendstream(sock, m, dsz);
P
Per Liden 已提交
1094 1095
}

1096
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1097
 */
1098
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1099
				u32 peer_node)
P
Per Liden 已提交
1100
{
1101 1102
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1103
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1104

1105 1106 1107 1108 1109
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1110

1111
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
1112
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1113 1114
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1115
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1116 1117 1118 1119 1120 1121
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1122 1123 1124 1125 1126 1127
}

/**
 * set_orig_addr - capture sender's address for received message
 * @m: descriptor for message info
 * @msg: received message header
1128
 *
P
Per Liden 已提交
1129 1130
 * Note: Address is not captured if not requested by receiver.
 */
S
Sam Ravnborg 已提交
1131
static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
P
Per Liden 已提交
1132
{
1133
	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
P
Per Liden 已提交
1134

1135
	if (addr) {
P
Per Liden 已提交
1136 1137
		addr->family = AF_TIPC;
		addr->addrtype = TIPC_ADDR_ID;
1138
		memset(&addr->addr, 0, sizeof(addr->addr));
P
Per Liden 已提交
1139 1140
		addr->addr.id.ref = msg_origport(msg);
		addr->addr.id.node = msg_orignode(msg);
1141 1142
		addr->addr.name.domain = 0;	/* could leave uninitialized */
		addr->scope = 0;		/* could leave uninitialized */
P
Per Liden 已提交
1143 1144 1145 1146 1147
		m->msg_namelen = sizeof(struct sockaddr_tipc);
	}
}

/**
1148
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1149 1150
 * @m: descriptor for message info
 * @msg: received message header
1151
 * @tsk: TIPC port associated with message
1152
 *
P
Per Liden 已提交
1153
 * Note: Ancillary data is not captured if not requested by receiver.
1154
 *
P
Per Liden 已提交
1155 1156
 * Returns 0 if successful, otherwise errno
 */
1157 1158
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1159 1160 1161 1162
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1163
	int has_name;
P
Per Liden 已提交
1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1174 1175
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1176
			return res;
1177 1178 1179 1180 1181 1182
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1183 1184 1185 1186 1187 1188
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1189
		has_name = 1;
P
Per Liden 已提交
1190 1191 1192 1193 1194
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1195
		has_name = 1;
P
Per Liden 已提交
1196 1197 1198 1199 1200
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1201 1202 1203 1204
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1205 1206
		break;
	default:
1207
		has_name = 0;
P
Per Liden 已提交
1208
	}
1209 1210 1211 1212 1213
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1214 1215 1216 1217

	return 0;
}

1218
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1219
{
1220 1221
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1222
	struct sk_buff *skb = NULL;
1223
	struct tipc_msg *msg;
1224 1225
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1226

1227
	if (!tipc_sk_connected(sk))
1228
		return;
1229 1230 1231
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1232
	if (!skb)
1233
		return;
1234
	msg = buf_msg(skb);
1235 1236 1237 1238 1239 1240 1241 1242
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1243
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1244 1245
}

1246
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1247 1248 1249
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1250
	long timeo = *timeop;
Y
Ying Xue 已提交
1251 1252 1253 1254
	int err;

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1255
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1256
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1270 1271 1272
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
1273 1274
	}
	finish_wait(sk_sleep(sk), &wait);
1275
	*timeop = timeo;
Y
Ying Xue 已提交
1276 1277 1278
	return err;
}

1279
/**
1280
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1281 1282 1283
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1284
 *
P
Per Liden 已提交
1285 1286 1287 1288 1289
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1290 1291
static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
			int flags)
P
Per Liden 已提交
1292
{
1293
	struct sock *sk = sock->sk;
1294
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1295 1296
	struct sk_buff *buf;
	struct tipc_msg *msg;
1297
	bool is_connectionless = tipc_sk_type_connectionless(sk);
Y
Ying Xue 已提交
1298
	long timeo;
P
Per Liden 已提交
1299 1300
	unsigned int sz;
	u32 err;
1301
	int res, hlen;
P
Per Liden 已提交
1302

1303
	/* Catch invalid receive requests */
P
Per Liden 已提交
1304 1305 1306
	if (unlikely(!buf_len))
		return -EINVAL;

1307
	lock_sock(sk);
P
Per Liden 已提交
1308

1309
	if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) {
1310
		res = -ENOTCONN;
P
Per Liden 已提交
1311 1312 1313
		goto exit;
	}

Y
Ying Xue 已提交
1314
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1315
restart:
P
Per Liden 已提交
1316

1317
	/* Look for a message in receive queue; wait if necessary */
1318
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1319 1320
	if (res)
		goto exit;
P
Per Liden 已提交
1321

1322 1323
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1324 1325
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
1326
	hlen = msg_hdr_sz(msg);
P
Per Liden 已提交
1327 1328 1329 1330
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1331
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1332 1333 1334 1335 1336 1337 1338
		goto restart;
	}

	/* Capture sender's address (optional) */
	set_orig_addr(m, msg);

	/* Capture ancillary data (optional) */
1339
	res = tipc_sk_anc_data_recv(m, msg, tsk);
1340
	if (res)
P
Per Liden 已提交
1341 1342 1343 1344 1345 1346 1347 1348
		goto exit;

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
		if (unlikely(buf_len < sz)) {
			sz = buf_len;
			m->msg_flags |= MSG_TRUNC;
		}
1349
		res = skb_copy_datagram_msg(buf, hlen, m, sz);
1350
		if (res)
P
Per Liden 已提交
1351 1352 1353
			goto exit;
		res = sz;
	} else {
1354 1355
		if (is_connectionless || err == TIPC_CONN_SHUTDOWN ||
		    m->msg_control)
P
Per Liden 已提交
1356 1357 1358 1359 1360
			res = 0;
		else
			res = -ECONNRESET;
	}

1361 1362 1363
	if (unlikely(flags & MSG_PEEK))
		goto exit;

1364
	if (likely(!is_connectionless)) {
1365 1366 1367
		tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
		if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
			tipc_sk_send_ack(tsk);
1368
	}
1369
	tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1370
exit:
1371
	release_sock(sk);
P
Per Liden 已提交
1372 1373 1374
	return res;
}

1375
/**
1376
 * tipc_recv_stream - receive stream-oriented data
P
Per Liden 已提交
1377 1378 1379
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1380 1381
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1382 1383 1384 1385
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1386 1387
static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
			    size_t buf_len, int flags)
P
Per Liden 已提交
1388
{
1389
	struct sock *sk = sock->sk;
1390
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1391 1392
	struct sk_buff *buf;
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1393
	long timeo;
P
Per Liden 已提交
1394
	unsigned int sz;
1395
	int target;
P
Per Liden 已提交
1396 1397
	int sz_copied = 0;
	u32 err;
1398
	int res = 0, hlen;
P
Per Liden 已提交
1399

1400
	/* Catch invalid receive attempts */
P
Per Liden 已提交
1401 1402 1403
	if (unlikely(!buf_len))
		return -EINVAL;

1404
	lock_sock(sk);
P
Per Liden 已提交
1405

1406
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1407
		res = -ENOTCONN;
P
Per Liden 已提交
1408 1409 1410
		goto exit;
	}

1411
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
Y
Ying Xue 已提交
1412
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1413

1414
restart:
1415
	/* Look for a message in receive queue; wait if necessary */
1416
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1417 1418
	if (res)
		goto exit;
P
Per Liden 已提交
1419

1420 1421
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1422 1423
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
1424
	hlen = msg_hdr_sz(msg);
P
Per Liden 已提交
1425 1426 1427 1428
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1429
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1430 1431 1432 1433 1434 1435
		goto restart;
	}

	/* Optionally capture sender's address & ancillary data of first msg */
	if (sz_copied == 0) {
		set_orig_addr(m, msg);
1436
		res = tipc_sk_anc_data_recv(m, msg, tsk);
1437
		if (res)
P
Per Liden 已提交
1438 1439 1440 1441 1442
			goto exit;
	}

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
1443 1444 1445
		u32 offset = TIPC_SKB_CB(buf)->bytes_read;
		u32 needed;
		int sz_to_copy;
P
Per Liden 已提交
1446

1447
		sz -= offset;
P
Per Liden 已提交
1448
		needed = (buf_len - sz_copied);
1449
		sz_to_copy = min(sz, needed);
1450

1451
		res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
1452
		if (res)
P
Per Liden 已提交
1453
			goto exit;
1454

P
Per Liden 已提交
1455 1456 1457 1458
		sz_copied += sz_to_copy;

		if (sz_to_copy < sz) {
			if (!(flags & MSG_PEEK))
1459 1460
				TIPC_SKB_CB(buf)->bytes_read =
					offset + sz_to_copy;
P
Per Liden 已提交
1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472
			goto exit;
		}
	} else {
		if (sz_copied != 0)
			goto exit; /* can't add error msg to valid data */

		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
			res = 0;
		else
			res = -ECONNRESET;
	}

1473 1474 1475 1476 1477 1478 1479
	if (unlikely(flags & MSG_PEEK))
		goto exit;

	tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
	if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
		tipc_sk_send_ack(tsk);
	tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1480 1481

	/* Loop around if more data is required */
1482 1483
	if ((sz_copied < buf_len) &&	/* didn't get all requested data */
	    (!skb_queue_empty(&sk->sk_receive_queue) ||
1484
	    (sz_copied < target)) &&	/* and more is ready or required */
1485
	    (!err))			/* and haven't reached a FIN */
P
Per Liden 已提交
1486 1487 1488
		goto restart;

exit:
1489
	release_sock(sk);
1490
	return sz_copied ? sz_copied : res;
P
Per Liden 已提交
1491 1492
}

1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1503
	if (skwq_has_sleeper(wq))
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513
		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
						POLLWRNORM | POLLWRBAND);
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1514
static void tipc_data_ready(struct sock *sk)
1515 1516 1517 1518 1519
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1520
	if (skwq_has_sleeper(wq))
1521 1522 1523 1524 1525
		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
						POLLRDNORM | POLLRDBAND);
	rcu_read_unlock();
}

1526 1527 1528 1529 1530
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

1531 1532
/**
 * filter_connect - Handle all incoming messages for a connection-based socket
1533
 * @tsk: TIPC socket
1534
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1535
 *
1536
 * Returns true if everything ok, false otherwise
1537
 */
1538
static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1539
{
1540
	struct sock *sk = &tsk->sk;
1541
	struct net *net = sock_net(sk);
1542
	struct tipc_msg *hdr = buf_msg(skb);
1543

1544 1545
	if (unlikely(msg_mcast(hdr)))
		return false;
1546

1547 1548
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1549 1550 1551
		/* Accept only ACK or NACK message */
		if (unlikely(!msg_connected(hdr)))
			return false;
1552

1553
		if (unlikely(msg_errcode(hdr))) {
1554
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1555
			sk->sk_err = ECONNREFUSED;
1556
			return true;
1557 1558
		}

1559
		if (unlikely(!msg_isdata(hdr))) {
1560
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1561
			sk->sk_err = EINVAL;
1562
			return true;
1563 1564
		}

1565 1566
		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
		msg_set_importance(&tsk->phdr, msg_importance(hdr));
1567

1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579
		/* If 'ACK+' message, add to socket receive queue */
		if (msg_data_sz(hdr))
			return true;

		/* If empty 'ACK-' message, wake up sleeping connect() */
		if (waitqueue_active(sk_sleep(sk)))
			wake_up_interruptible(sk_sleep(sk));

		/* 'ACK-' message is neither accepted nor rejected: */
		msg_set_dest_droppable(hdr, 1);
		return false;

1580
	case TIPC_OPEN:
1581
	case TIPC_DISCONNECTING:
1582 1583
		break;
	case TIPC_LISTEN:
1584
		/* Accept only SYN message */
1585 1586
		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
			return true;
1587
		break;
1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
		if (unlikely(!tsk_peer_msg(tsk, hdr)))
			return false;

		if (unlikely(msg_errcode(hdr))) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			/* Let timer expire on it's own */
			tipc_node_remove_conn(net, tsk_peer_node(tsk),
					      tsk->portid);
			sk->sk_state_change(sk);
		}
		return true;
1601
	default:
1602
		pr_err("Unknown sk_state %u\n", sk->sk_state);
1603
	}
1604

1605
	return false;
1606 1607
}

1608 1609 1610
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
1611
 * @skb: message
1612
 *
1613 1614
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
1615
 *
1616 1617
 * For connectionless messages, queue limits are based on message
 * importance as follows:
1618
 *
1619 1620 1621 1622
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
1623 1624 1625
 *
 * Returns overload limit according to corresponding message importance
 */
1626
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1627
{
1628 1629 1630 1631 1632
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
1633

1634 1635
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
1636

1637
	return FLOWCTL_MSG_LIM;
1638 1639
}

1640
/**
1641 1642
 * filter_rcv - validate incoming message
 * @sk: socket
1643
 * @skb: pointer to message.
1644
 *
1645 1646 1647
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
1648
 * Called with socket lock already taken
1649
 *
1650
 * Returns true if message was added to socket receive queue, otherwise false
P
Per Liden 已提交
1651
 */
J
Jon Paul Maloy 已提交
1652 1653
static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
		       struct sk_buff_head *xmitq)
P
Per Liden 已提交
1654
{
1655
	struct tipc_sock *tsk = tipc_sk(sk);
1656 1657 1658 1659
	struct tipc_msg *hdr = buf_msg(skb);
	unsigned int limit = rcvbuf_limit(sk, skb);
	int err = TIPC_OK;
	int usr = msg_user(hdr);
1660
	u32 onode;
P
Per Liden 已提交
1661

1662
	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
J
Jon Paul Maloy 已提交
1663
		tipc_sk_proto_rcv(tsk, skb, xmitq);
1664
		return false;
1665
	}
1666

1667
	if (unlikely(usr == SOCK_WAKEUP)) {
1668
		onode = msg_orignode(hdr);
1669
		kfree_skb(skb);
1670 1671
		u32_del(&tsk->cong_links, onode);
		tsk->cong_link_cnt--;
1672
		sk->sk_write_space(sk);
1673
		return false;
1674 1675
	}

1676 1677 1678 1679 1680
	/* Drop if illegal message type */
	if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
		kfree_skb(skb);
		return false;
	}
1681

1682
	/* Reject if wrong message type for current socket state */
1683
	if (tipc_sk_type_connectionless(sk)) {
1684 1685 1686 1687 1688 1689 1690
		if (msg_connected(hdr)) {
			err = TIPC_ERR_NO_PORT;
			goto reject;
		}
	} else if (unlikely(!filter_connect(tsk, skb))) {
		err = TIPC_ERR_NO_PORT;
		goto reject;
P
Per Liden 已提交
1691 1692 1693
	}

	/* Reject message if there isn't room to queue it */
1694 1695 1696 1697
	if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
		err = TIPC_ERR_OVERLOAD;
		goto reject;
	}
P
Per Liden 已提交
1698

1699
	/* Enqueue message */
1700
	TIPC_SKB_CB(skb)->bytes_read = 0;
1701 1702
	__skb_queue_tail(&sk->sk_receive_queue, skb);
	skb_set_owner_r(skb, sk);
1703

1704
	sk->sk_data_ready(sk);
1705 1706 1707
	return true;

reject:
J
Jon Paul Maloy 已提交
1708 1709
	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
		__skb_queue_tail(xmitq, skb);
1710
	return false;
1711
}
P
Per Liden 已提交
1712

1713
/**
1714
 * tipc_backlog_rcv - handle incoming message from backlog queue
1715
 * @sk: socket
1716
 * @skb: message
1717
 *
1718
 * Caller must hold socket lock
1719 1720 1721
 *
 * Returns 0
 */
1722
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1723
{
1724
	unsigned int truesize = skb->truesize;
J
Jon Paul Maloy 已提交
1725 1726
	struct sk_buff_head xmitq;
	u32 dnode, selector;
1727

J
Jon Paul Maloy 已提交
1728 1729 1730
	__skb_queue_head_init(&xmitq);

	if (likely(filter_rcv(sk, skb, &xmitq))) {
1731
		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742
		return 0;
	}

	if (skb_queue_empty(&xmitq))
		return 0;

	/* Send response/rejected message */
	skb = __skb_dequeue(&xmitq);
	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
1743 1744 1745
	return 0;
}

1746
/**
1747 1748 1749 1750 1751
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
1752 1753 1754
 *
 * Caller must hold socket lock
 */
1755
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
1756
			    u32 dport, struct sk_buff_head *xmitq)
1757
{
J
Jon Paul Maloy 已提交
1758 1759
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
1760 1761
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
1762
	u32 onode;
1763 1764

	while (skb_queue_len(inputq)) {
1765
		if (unlikely(time_after_eq(jiffies, time_limit)))
1766 1767
			return;

1768 1769
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
1770 1771 1772
			return;

		/* Add message directly to receive queue if possible */
1773
		if (!sock_owned_by_user(sk)) {
J
Jon Paul Maloy 已提交
1774
			filter_rcv(sk, skb, xmitq);
1775
			continue;
1776
		}
1777 1778

		/* Try backlog, compensating for double-counted bytes */
1779
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
1780
		if (!sk->sk_backlog.len)
1781 1782 1783 1784
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
1785 1786

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
1787 1788 1789
		onode = tipc_own_addr(sock_net(sk));
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
1790
		break;
1791
	}
1792 1793
}

1794
/**
1795 1796 1797 1798
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
1799
 */
1800
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1801
{
J
Jon Paul Maloy 已提交
1802
	struct sk_buff_head xmitq;
1803
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
1804
	int err;
1805 1806
	struct tipc_sock *tsk;
	struct sock *sk;
1807
	struct sk_buff *skb;
1808

J
Jon Paul Maloy 已提交
1809
	__skb_queue_head_init(&xmitq);
1810 1811 1812
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
1813

1814 1815 1816
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
1817
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
1818 1819
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
1820 1821 1822 1823 1824
			/* Send pending response/rejected messages, if any */
			while ((skb = __skb_dequeue(&xmitq))) {
				dnode = msg_destnode(buf_msg(skb));
				tipc_node_xmit_skb(net, skb, dnode, dport);
			}
1825 1826 1827
			sock_put(sk);
			continue;
		}
1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840

		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
1841
			continue;
1842
xmit:
1843
		dnode = msg_destnode(buf_msg(skb));
1844
		tipc_node_xmit_skb(net, skb, dnode, dport);
1845
	}
P
Per Liden 已提交
1846 1847
}

Y
Ying Xue 已提交
1848 1849
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
1850
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
1863
		add_wait_queue(sk_sleep(sk), &wait);
1864
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
1865 1866
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
1867 1868 1869 1870
	} while (!done);
	return 0;
}

P
Per Liden 已提交
1871
/**
1872
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
1873 1874 1875
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
1876
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
1877 1878 1879
 *
 * Returns 0 on success, errno otherwise
 */
1880 1881
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
1882
{
1883
	struct sock *sk = sock->sk;
1884
	struct tipc_sock *tsk = tipc_sk(sk);
1885 1886
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
1887
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
1888
	int previous;
1889
	int res = 0;
1890

1891 1892
	lock_sock(sk);

1893
	/* DGRAM/RDM connect(), just save the destaddr */
1894
	if (tipc_sk_type_connectionless(sk)) {
1895
		if (dst->family == AF_UNSPEC) {
1896
			memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1897 1898
		} else if (destlen != sizeof(struct sockaddr_tipc)) {
			res = -EINVAL;
1899
		} else {
1900
			memcpy(&tsk->peer, dest, destlen);
1901
		}
1902 1903
		goto exit;
	}
1904 1905 1906 1907 1908 1909 1910

	/*
	 * Reject connection attempt using multicast address
	 *
	 * Note: send_msg() validates the rest of the address fields,
	 *       so there's no need to do it here
	 */
1911 1912 1913 1914 1915
	if (dst->addrtype == TIPC_ADDR_MCAST) {
		res = -EINVAL;
		goto exit;
	}

1916
	previous = sk->sk_state;
1917 1918 1919

	switch (sk->sk_state) {
	case TIPC_OPEN:
1920 1921 1922 1923 1924 1925 1926 1927 1928 1929
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

1930
		res = __tipc_sendmsg(sock, &m, 0);
1931 1932 1933
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

1934
		/* Just entered TIPC_CONNECTING state; the only
1935 1936 1937 1938
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
1939 1940 1941 1942 1943
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
1944
			goto exit;
1945
		}
Y
Ying Xue 已提交
1946 1947 1948
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
1949 1950
		break;
	case TIPC_ESTABLISHED:
1951
		res = -EISCONN;
1952 1953
		break;
	default:
1954
		res = -EINVAL;
1955
	}
1956

1957 1958
exit:
	release_sock(sk);
1959
	return res;
P
Per Liden 已提交
1960 1961
}

1962
/**
1963
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
1964 1965
 * @sock: socket structure
 * @len: (unused)
1966
 *
P
Per Liden 已提交
1967 1968
 * Returns 0 on success, errno otherwise
 */
1969
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
1970
{
1971 1972 1973 1974
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
1975
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
1976
	release_sock(sk);
1977

1978
	return res;
P
Per Liden 已提交
1979 1980
}

Y
Ying Xue 已提交
1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
1995
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
2006 2007 2008
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
2009 2010 2011 2012 2013
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2014
/**
2015
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2016 2017 2018
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2019
 *
P
Per Liden 已提交
2020 2021
 * Returns 0 on success, errno otherwise
 */
2022
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
P
Per Liden 已提交
2023
{
2024
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2025
	struct sk_buff *buf;
2026
	struct tipc_sock *new_tsock;
2027
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2028
	long timeo;
2029
	int res;
P
Per Liden 已提交
2030

2031
	lock_sock(sk);
P
Per Liden 已提交
2032

2033
	if (sk->sk_state != TIPC_LISTEN) {
2034
		res = -EINVAL;
P
Per Liden 已提交
2035 2036
		goto exit;
	}
Y
Ying Xue 已提交
2037 2038 2039 2040
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2041 2042 2043

	buf = skb_peek(&sk->sk_receive_queue);

2044
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
2045 2046
	if (res)
		goto exit;
2047
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2048

2049
	new_sk = new_sock->sk;
2050
	new_tsock = tipc_sk(new_sk);
2051
	msg = buf_msg(buf);
P
Per Liden 已提交
2052

2053 2054 2055 2056 2057 2058 2059
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2060
	tsk_rej_rx_queue(new_sk);
2061 2062

	/* Connect new socket to it's peer */
2063
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2064

2065
	tsk_set_importance(new_tsock, msg_importance(msg));
2066
	if (msg_named(msg)) {
2067 2068
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2069
	}
2070 2071 2072 2073 2074 2075 2076 2077

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2078
		tsk_advance_rx_queue(sk);
2079
		__tipc_sendstream(new_sock, &m, 0);
2080 2081 2082
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2083
		skb_set_owner_r(buf, new_sk);
2084 2085
	}
	release_sock(new_sk);
P
Per Liden 已提交
2086
exit:
2087
	release_sock(sk);
P
Per Liden 已提交
2088 2089 2090 2091
	return res;
}

/**
2092
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2093
 * @sock: socket structure
2094
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2095 2096
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2097
 *
P
Per Liden 已提交
2098 2099
 * Returns 0 on success, errno otherwise
 */
2100
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2101
{
2102
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2103 2104
	int res;

2105 2106
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2107

2108
	lock_sock(sk);
P
Per Liden 已提交
2109

2110 2111
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2112

2113
	if (sk->sk_state == TIPC_DISCONNECTING) {
2114
		/* Discard any unreceived messages */
2115
		__skb_queue_purge(&sk->sk_receive_queue);
2116 2117 2118

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2119
		res = 0;
2120
	} else {
P
Per Liden 已提交
2121 2122 2123
		res = -ENOTCONN;
	}

2124
	release_sock(sk);
P
Per Liden 已提交
2125 2126 2127
	return res;
}

2128
static void tipc_sk_timeout(unsigned long data)
2129
{
2130 2131
	struct tipc_sock *tsk = (struct tipc_sock *)data;
	struct sock *sk = &tsk->sk;
2132
	struct sk_buff *skb = NULL;
2133
	u32 peer_port, peer_node;
2134
	u32 own_node = tsk_own_node(tsk);
2135

J
Jon Paul Maloy 已提交
2136
	bh_lock_sock(sk);
2137
	if (!tipc_sk_connected(sk)) {
J
Jon Paul Maloy 已提交
2138 2139
		bh_unlock_sock(sk);
		goto exit;
2140
	}
2141 2142
	peer_port = tsk_peer_port(tsk);
	peer_node = tsk_peer_node(tsk);
2143

2144
	if (tsk->probe_unacked) {
2145
		if (!sock_owned_by_user(sk)) {
2146
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2147 2148 2149 2150 2151 2152 2153 2154
			tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
					      tsk_peer_port(tsk));
			sk->sk_state_change(sk);
		} else {
			/* Try again later */
			sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
		}

2155 2156
		bh_unlock_sock(sk);
		goto exit;
2157
	}
2158 2159 2160 2161

	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
			      INT_H_SIZE, 0, peer_node, own_node,
			      peer_port, tsk->portid, TIPC_OK);
2162
	tsk->probe_unacked = true;
2163
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
2164
	bh_unlock_sock(sk);
2165
	if (skb)
2166
		tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
J
Jon Paul Maloy 已提交
2167
exit:
2168
	sock_put(sk);
2169 2170
}

2171
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2172 2173
			   struct tipc_name_seq const *seq)
{
2174 2175
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2176 2177 2178
	struct publication *publ;
	u32 key;

2179
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2180
		return -EINVAL;
2181 2182
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2183 2184
		return -EADDRINUSE;

2185
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2186
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2187 2188 2189
	if (unlikely(!publ))
		return -EINVAL;

2190 2191 2192
	list_add(&publ->pport_list, &tsk->publications);
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2193 2194 2195
	return 0;
}

2196
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2197 2198
			    struct tipc_name_seq const *seq)
{
2199
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2200 2201 2202 2203
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

2204
	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
J
Jon Paul Maloy 已提交
2205 2206 2207 2208 2209 2210 2211 2212 2213
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2214
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2215 2216 2217 2218
					      publ->ref, publ->key);
			rc = 0;
			break;
		}
2219
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2220 2221 2222
				      publ->ref, publ->key);
		rc = 0;
	}
2223 2224
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2225 2226 2227
	return rc;
}

2228 2229 2230
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2231
void tipc_sk_reinit(struct net *net)
2232
{
2233
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2234 2235 2236
	const struct bucket_table *tbl;
	struct rhash_head *pos;
	struct tipc_sock *tsk;
2237
	struct tipc_msg *msg;
2238
	int i;
2239

2240
	rcu_read_lock();
2241
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2242 2243 2244 2245
	for (i = 0; i < tbl->size; i++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2246 2247
			msg_set_prevnode(msg, tn->own_addr);
			msg_set_orignode(msg, tn->own_addr);
2248 2249
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2250
	}
2251
	rcu_read_unlock();
2252 2253
}

2254
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2255
{
2256
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2257
	struct tipc_sock *tsk;
2258

2259
	rcu_read_lock();
2260
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2261 2262 2263
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2264

2265
	return tsk;
2266 2267
}

2268
static int tipc_sk_insert(struct tipc_sock *tsk)
2269
{
2270 2271 2272
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2273 2274
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2275

2276 2277 2278 2279 2280 2281
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2282 2283
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2284 2285
			return 0;
		sock_put(&tsk->sk);
2286 2287
	}

2288
	return -1;
2289 2290
}

2291
static void tipc_sk_remove(struct tipc_sock *tsk)
2292
{
2293
	struct sock *sk = &tsk->sk;
2294
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2295

2296
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2297 2298
		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
		__sock_put(sk);
2299 2300 2301
	}
}

2302 2303 2304 2305 2306 2307 2308
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2309
	.automatic_shrinking = true,
2310 2311
};

2312
int tipc_sk_rht_init(struct net *net)
2313
{
2314
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2315 2316

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2317 2318
}

2319
void tipc_sk_rht_destroy(struct net *net)
2320
{
2321 2322
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2323 2324
	/* Wait for socket readers to complete */
	synchronize_net();
2325

2326
	rhashtable_destroy(&tn->sk_rht);
2327 2328
}

P
Per Liden 已提交
2329
/**
2330
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2331 2332 2333 2334 2335
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2336 2337
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2338
 * (to ease compatibility).
2339
 *
P
Per Liden 已提交
2340 2341
 * Returns 0 on success, errno otherwise
 */
2342 2343
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2344
{
2345
	struct sock *sk = sock->sk;
2346
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
2347 2348 2349
	u32 value;
	int res;

2350 2351
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2352 2353 2354 2355
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
	if (ol < sizeof(value))
		return -EINVAL;
2356 2357
	res = get_user(value, (u32 __user *)ov);
	if (res)
P
Per Liden 已提交
2358 2359
		return res;

2360
	lock_sock(sk);
2361

P
Per Liden 已提交
2362 2363
	switch (opt) {
	case TIPC_IMPORTANCE:
2364
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2365 2366 2367
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2368
			tsk_set_unreliable(tsk, value);
2369
		else
P
Per Liden 已提交
2370 2371 2372
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2373
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2374 2375
		break;
	case TIPC_CONN_TIMEOUT:
2376
		tipc_sk(sk)->conn_timeout = value;
2377
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2378 2379 2380 2381 2382
		break;
	default:
		res = -EINVAL;
	}

2383 2384
	release_sock(sk);

P
Per Liden 已提交
2385 2386 2387 2388
	return res;
}

/**
2389
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2390 2391 2392 2393 2394
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2395 2396
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2397
 * (to ease compatibility).
2398
 *
P
Per Liden 已提交
2399 2400
 * Returns 0 on success, errno otherwise
 */
2401 2402
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2403
{
2404
	struct sock *sk = sock->sk;
2405
	struct tipc_sock *tsk = tipc_sk(sk);
2406
	int len;
P
Per Liden 已提交
2407
	u32 value;
2408
	int res;
P
Per Liden 已提交
2409

2410 2411
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2412 2413
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2414 2415
	res = get_user(len, ol);
	if (res)
2416
		return res;
P
Per Liden 已提交
2417

2418
	lock_sock(sk);
P
Per Liden 已提交
2419 2420 2421

	switch (opt) {
	case TIPC_IMPORTANCE:
2422
		value = tsk_importance(tsk);
P
Per Liden 已提交
2423 2424
		break;
	case TIPC_SRC_DROPPABLE:
2425
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2426 2427
		break;
	case TIPC_DEST_DROPPABLE:
2428
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2429 2430
		break;
	case TIPC_CONN_TIMEOUT:
2431
		value = tsk->conn_timeout;
2432
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2433
		break;
2434
	case TIPC_NODE_RECVQ_DEPTH:
2435
		value = 0; /* was tipc_queue_size, now obsolete */
2436
		break;
2437
	case TIPC_SOCK_RECVQ_DEPTH:
2438 2439
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
P
Per Liden 已提交
2440 2441 2442 2443
	default:
		res = -EINVAL;
	}

2444 2445
	release_sock(sk);

2446 2447
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2448

2449 2450 2451 2452 2453 2454 2455
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2456 2457
}

2458
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2459
{
2460
	struct sock *sk = sock->sk;
E
Erik Hugne 已提交
2461 2462 2463 2464 2465 2466 2467
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
2468 2469
		if (!tipc_node_get_linkname(sock_net(sk),
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

2481 2482
/* Protocol switches for the various types of TIPC sockets */

2483
static const struct proto_ops msg_ops = {
2484
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2485
	.family		= AF_TIPC,
2486 2487 2488
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2489
	.socketpair	= sock_no_socketpair,
2490
	.accept		= sock_no_accept,
2491 2492
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2493
	.ioctl		= tipc_ioctl,
2494
	.listen		= sock_no_listen,
2495 2496 2497 2498 2499
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
2500 2501
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2502 2503
};

2504
static const struct proto_ops packet_ops = {
2505
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2506
	.family		= AF_TIPC,
2507 2508 2509
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2510
	.socketpair	= sock_no_socketpair,
2511 2512 2513
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2514
	.ioctl		= tipc_ioctl,
2515 2516 2517 2518 2519 2520
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
2521 2522
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2523 2524
};

2525
static const struct proto_ops stream_ops = {
2526
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2527
	.family		= AF_TIPC,
2528 2529 2530
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2531
	.socketpair	= sock_no_socketpair,
2532 2533 2534
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2535
	.ioctl		= tipc_ioctl,
2536 2537 2538 2539
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
2540
	.sendmsg	= tipc_sendstream,
2541
	.recvmsg	= tipc_recv_stream,
2542 2543
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2544 2545
};

2546
static const struct net_proto_family tipc_family_ops = {
2547
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2548
	.family		= AF_TIPC,
2549
	.create		= tipc_sk_create
P
Per Liden 已提交
2550 2551 2552 2553 2554
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
2555 2556
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
2557 2558 2559
};

/**
2560
 * tipc_socket_init - initialize TIPC socket interface
2561
 *
P
Per Liden 已提交
2562 2563
 * Returns 0 on success, errno otherwise
 */
2564
int tipc_socket_init(void)
P
Per Liden 已提交
2565 2566 2567
{
	int res;

2568
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
2569
	if (res) {
2570
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
2571 2572 2573 2574 2575
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
2576
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
2577 2578 2579 2580 2581 2582 2583 2584
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
2585
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
2586
 */
2587
void tipc_socket_stop(void)
P
Per Liden 已提交
2588 2589 2590 2591
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
2592 2593

/* Caller should hold socket lock for the passed tipc socket. */
2594
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2629 2630
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
2631 2632 2633 2634
{
	int err;
	void *hdr;
	struct nlattr *attrs;
2635 2636
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2637
	struct sock *sk = &tsk->sk;
2638 2639

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2640
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
2641 2642 2643 2644 2645 2646
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
2647
	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
2648
		goto attr_msg_cancel;
2649
	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
2650 2651
		goto attr_msg_cancel;

2652
	if (tipc_sk_connected(sk)) {
2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676
		err = __tipc_nl_add_sk_con(skb, tsk);
		if (err)
			goto attr_msg_cancel;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			goto attr_msg_cancel;
	}
	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
	struct tipc_sock *tsk;
2677 2678
	const struct bucket_table *tbl;
	struct rhash_head *pos;
2679 2680
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2681 2682
	u32 tbl_id = cb->args[0];
	u32 prev_portid = cb->args[1];
2683

2684
	rcu_read_lock();
2685
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2686 2687
	for (; tbl_id < tbl->size; tbl_id++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
2688
			spin_lock_bh(&tsk->sk.sk_lock.slock);
2689 2690 2691 2692 2693
			if (prev_portid && prev_portid != tsk->portid) {
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				continue;
			}

2694
			err = __tipc_nl_add_sk(skb, cb, tsk);
2695 2696 2697 2698 2699 2700
			if (err) {
				prev_portid = tsk->portid;
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				goto out;
			}
			prev_portid = 0;
2701 2702
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2703
	}
2704
out:
2705
	rcu_read_unlock();
2706 2707
	cb->args[0] = tbl_id;
	cb->args[1] = prev_portid;
2708 2709 2710

	return skb->len;
}
2711 2712

/* Caller should hold socket lock for the passed tipc socket. */
2713 2714 2715
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
2716 2717 2718 2719 2720
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2721
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2752 2753 2754
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794
{
	int err;
	struct publication *p;

	if (*last_publ) {
		list_for_each_entry(p, &tsk->publications, pport_list) {
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
				     pport_list);
	}

	list_for_each_entry_from(p, &tsk->publications, pport_list) {
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
2795
	u32 tsk_portid = cb->args[0];
2796 2797
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
2798
	struct net *net = sock_net(skb->sk);
2799 2800
	struct tipc_sock *tsk;

2801
	if (!tsk_portid) {
2802 2803 2804 2805 2806 2807 2808
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

2809 2810 2811
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

2812 2813 2814 2815 2816 2817 2818 2819 2820
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
				       tipc_nl_sock_policy);
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

2821
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
2822 2823 2824 2825 2826
	}

	if (done)
		return 0;

2827
	tsk = tipc_sk_lookup(net, tsk_portid);
2828 2829 2830 2831 2832 2833 2834 2835
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
2836
	sock_put(&tsk->sk);
2837

2838
	cb->args[0] = tsk_portid;
2839 2840 2841 2842 2843
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}