socket.c 71.4 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
4
 * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
P
Per Liden 已提交
38
#include "core.h"
39
#include "name_table.h"
E
Erik Hugne 已提交
40
#include "node.h"
41
#include "link.h"
42
#include "name_distr.h"
43
#include "socket.h"
44
#include "bcast.h"
45
#include "netlink.h"
46

47
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
48
#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
49 50 51
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
52

53 54
enum {
	TIPC_LISTEN = TCP_LISTEN,
55
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
56
	TIPC_OPEN = TCP_CLOSE,
57
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
58
	TIPC_CONNECTING = TCP_SYN_SENT,
59 60
};

61 62 63 64 65 66 67
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
68
 * @portid: unique port identity in TIPC socket hash table
69 70 71 72 73 74 75 76 77
 * @phdr: preformatted message header used when sending messages
 * @publications: list of publications for port
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
 * @link_cong: non-zero if owner must sleep because of link congestion
 * @sent_unacked: # messages sent by socket, and not yet acked by peer
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
78
 * @peer: 'connected' peer for dgram/rdm
79 80
 * @node: hash table node
 * @rcu: rcu struct for tipc_sock
81 82 83 84 85 86 87
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
88
	u32 portid;
89 90 91 92 93 94
	struct tipc_msg phdr;
	struct list_head sock_list;
	struct list_head publications;
	u32 pub_count;
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
95
	bool probe_unacked;
96
	bool link_cong;
97 98
	u16 snt_unacked;
	u16 snd_win;
99
	u16 peer_caps;
100 101
	u16 rcv_unacked;
	u16 rcv_win;
102
	struct sockaddr_tipc peer;
103 104
	struct rhash_head node;
	struct rcu_head rcu;
105
};
P
Per Liden 已提交
106

107
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
108
static void tipc_data_ready(struct sock *sk);
109
static void tipc_write_space(struct sock *sk);
110
static void tipc_sock_destruct(struct sock *sk);
111 112
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
113
static void tipc_sk_timeout(unsigned long data);
114
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
115
			   struct tipc_name_seq const *seq);
116
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
117
			    struct tipc_name_seq const *seq);
118
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
119 120
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
121 122 123
static int __tipc_send_stream(struct socket *sock, struct msghdr *m,
			      size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
124

125 126 127
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
128
static struct proto tipc_proto;
129 130
static const struct rhashtable_params tsk_rht_params;

131 132 133 134 135
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

136
static u32 tsk_peer_node(struct tipc_sock *tsk)
137
{
138
	return msg_destnode(&tsk->phdr);
139 140
}

141
static u32 tsk_peer_port(struct tipc_sock *tsk)
142
{
143
	return msg_destport(&tsk->phdr);
144 145
}

146
static  bool tsk_unreliable(struct tipc_sock *tsk)
147
{
148
	return msg_src_droppable(&tsk->phdr) != 0;
149 150
}

151
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
152
{
153
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
154 155
}

156
static bool tsk_unreturnable(struct tipc_sock *tsk)
157
{
158
	return msg_dest_droppable(&tsk->phdr) != 0;
159 160
}

161
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
162
{
163
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
164 165
}

166
static int tsk_importance(struct tipc_sock *tsk)
167
{
168
	return msg_importance(&tsk->phdr);
169 170
}

171
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
172 173 174
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
175
	msg_set_importance(&tsk->phdr, (u32)imp);
176 177
	return 0;
}
178

179 180 181 182 183
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

184
static bool tsk_conn_cong(struct tipc_sock *tsk)
185
{
186
	return tsk->snt_unacked > tsk->snd_win;
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
}

/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
207 208
}

209
/**
210
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
211 212
 *
 * Caller must hold socket lock
P
Per Liden 已提交
213
 */
214
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
215
{
216
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
217 218
}

219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
235
/**
236
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
237 238
 *
 * Caller must hold socket lock
P
Per Liden 已提交
239
 */
240
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
241
{
242
	struct sk_buff *skb;
243

244 245
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
246 247
}

248 249
static bool tipc_sk_connected(struct sock *sk)
{
250
	return sk->sk_state == TIPC_ESTABLISHED;
251 252
}

253 254 255 256 257 258 259 260 261 262
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

263
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
264 265 266 267
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
268
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
269
{
270 271
	struct sock *sk = &tsk->sk;
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
272
	u32 peer_port = tsk_peer_port(tsk);
J
Jon Paul Maloy 已提交
273 274 275
	u32 orig_node;
	u32 peer_node;

276
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
277 278 279 280 281 282
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
283
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
284 285 286 287

	if (likely(orig_node == peer_node))
		return true;

288
	if (!orig_node && (peer_node == tn->own_addr))
J
Jon Paul Maloy 已提交
289 290
		return true;

291
	if (!peer_node && (orig_node == tn->own_addr))
J
Jon Paul Maloy 已提交
292 293 294 295 296
		return true;

	return false;
}

297 298 299 300 301 302 303 304 305
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
306
	int oldsk_state = sk->sk_state;
307 308 309
	int res = -EINVAL;

	switch (state) {
310 311 312
	case TIPC_OPEN:
		res = 0;
		break;
313
	case TIPC_LISTEN:
314
	case TIPC_CONNECTING:
315
		if (oldsk_state == TIPC_OPEN)
316 317
			res = 0;
		break;
318
	case TIPC_ESTABLISHED:
319
		if (oldsk_state == TIPC_CONNECTING ||
320
		    oldsk_state == TIPC_OPEN)
321 322
			res = 0;
		break;
323
	case TIPC_DISCONNECTING:
324
		if (oldsk_state == TIPC_CONNECTING ||
325 326 327
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
328 329 330 331 332 333 334 335
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

#define tipc_wait_for_cond(sock_, timeout_, condition_)			\
({								        \
	int rc_ = 0;							\
	int done_ = 0;							\
									\
	while (!(condition_) && !done_) {				\
		struct sock *sk_ = sock->sk;				\
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);		\
									\
		rc_ = tipc_sk_sock_err(sock_, timeout_);		\
		if (rc_)						\
			break;						\
		prepare_to_wait(sk_sleep(sk_), &wait_,			\
				TASK_INTERRUPTIBLE);			\
		done_ = sk_wait_event(sk_, timeout_,			\
				      (condition_), &wait_);		\
		remove_wait_queue(sk_sleep(sk_), &wait_);		\
	}								\
	rc_;								\
})

P
Per Liden 已提交
379
/**
380
 * tipc_sk_create - create a TIPC socket
381
 * @net: network namespace (must be default network)
P
Per Liden 已提交
382 383
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
384
 * @kern: caused by kernel or by userspace?
385
 *
386 387
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
388 389 390
 *
 * Returns 0 on success, errno otherwise
 */
391 392
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
393
{
394
	struct tipc_net *tn;
395
	const struct proto_ops *ops;
P
Per Liden 已提交
396
	struct sock *sk;
397
	struct tipc_sock *tsk;
398
	struct tipc_msg *msg;
399 400

	/* Validate arguments */
P
Per Liden 已提交
401 402 403 404 405
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
406
		ops = &stream_ops;
P
Per Liden 已提交
407 408
		break;
	case SOCK_SEQPACKET:
409
		ops = &packet_ops;
P
Per Liden 已提交
410 411 412
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
413
		ops = &msg_ops;
P
Per Liden 已提交
414
		break;
415 416
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
417 418
	}

419
	/* Allocate socket's protocol area */
420
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
421
	if (sk == NULL)
P
Per Liden 已提交
422 423
		return -ENOMEM;

424
	tsk = tipc_sk(sk);
425 426 427
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
	msg = &tsk->phdr;
428 429
	tn = net_generic(sock_net(sk), tipc_net_id);
	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
430
		      NAMED_H_SIZE, 0);
P
Per Liden 已提交
431

432 433 434
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
435
	tipc_set_sk_state(sk, TIPC_OPEN);
436
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
437
		pr_warn("Socket create failed; port number exhausted\n");
438 439 440
		return -EINVAL;
	}
	msg_set_origport(msg, tsk->portid);
441
	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
442
	sk->sk_shutdown = 0;
443
	sk->sk_backlog_rcv = tipc_backlog_rcv;
444
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
445 446
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
447
	sk->sk_destruct = tipc_sock_destruct;
448 449
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
	atomic_set(&tsk->dupl_rcvcnt, 0);
450

451 452 453 454
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

455
	if (tipc_sk_type_connectionless(sk)) {
456
		tsk_set_unreturnable(tsk, true);
457
		if (sock->type == SOCK_DGRAM)
458
			tsk_set_unreliable(tsk, true);
459
	}
460

P
Per Liden 已提交
461 462 463
	return 0;
}

464 465 466 467 468 469 470
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
486
			continue;
487
		}
488 489 490 491 492 493
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
494
	}
495 496 497 498

	if (tipc_sk_type_connectionless(sk))
		return;

499 500 501 502 503 504 505
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
506 507
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
508 509 510
	}
}

P
Per Liden 已提交
511
/**
512
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
513 514 515 516 517 518 519
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
520
 *
P
Per Liden 已提交
521 522 523 524 525 526
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
527
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
528 529
{
	struct sock *sk = sock->sk;
530
	struct tipc_sock *tsk;
P
Per Liden 已提交
531

532 533 534 535 536
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
537
		return 0;
538

539
	tsk = tipc_sk(sk);
540 541
	lock_sock(sk);

542 543
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
544
	tipc_sk_withdraw(tsk, 0, NULL);
545
	sk_stop_timer(sk, &sk->sk_timer);
546
	tipc_sk_remove(tsk);
P
Per Liden 已提交
547

548 549
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
550 551

	call_rcu(&tsk->rcu, tipc_sk_callback);
552
	sock->sk = NULL;
P
Per Liden 已提交
553

554
	return 0;
P
Per Liden 已提交
555 556 557
}

/**
558
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
559 560 561
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
562
 *
P
Per Liden 已提交
563 564 565
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
566
 *
P
Per Liden 已提交
567
 * Returns 0 on success, errno otherwise
568 569 570
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
571
 */
572 573
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
574
{
575
	struct sock *sk = sock->sk;
P
Per Liden 已提交
576
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
577
	struct tipc_sock *tsk = tipc_sk(sk);
578
	int res = -EINVAL;
P
Per Liden 已提交
579

580 581
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
582
		res = tipc_sk_withdraw(tsk, 0, NULL);
583 584
		goto exit;
	}
585

586 587 588 589 590 591 592 593
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
594 595 596

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
597 598 599 600
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
601

602
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
603
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
604 605 606 607
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
608

609
	res = (addr->scope > 0) ?
610 611
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
612 613 614
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
615 616
}

617
/**
618
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
619 620 621
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
622
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
623
 *
P
Per Liden 已提交
624
 * Returns 0 on success, errno otherwise
625
 *
626 627
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
628
 *       a completely predictable manner).
P
Per Liden 已提交
629
 */
630 631
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
			int *uaddr_len, int peer)
P
Per Liden 已提交
632 633
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
634 635
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
636
	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
P
Per Liden 已提交
637

638
	memset(addr, 0, sizeof(*addr));
639
	if (peer) {
640
		if ((!tipc_sk_connected(sk)) &&
641
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
642
			return -ENOTCONN;
643 644
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
645
	} else {
646
		addr->addr.id.ref = tsk->portid;
647
		addr->addr.id.node = tn->own_addr;
648
	}
P
Per Liden 已提交
649 650 651 652 653 654 655

	*uaddr_len = sizeof(*addr);
	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

656
	return 0;
P
Per Liden 已提交
657 658 659
}

/**
660
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
661 662 663 664
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
 * @wait: ???
 *
665 666 667 668 669 670 671 672
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
673 674 675
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
676
 */
677 678
static unsigned int tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
679
{
680
	struct sock *sk = sock->sk;
681
	struct tipc_sock *tsk = tipc_sk(sk);
682
	u32 mask = 0;
683

684
	sock_poll_wait(file, sk_sleep(sk), wait);
685

686 687 688 689 690
	if (sk->sk_shutdown & RCV_SHUTDOWN)
		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
	if (sk->sk_shutdown == SHUTDOWN_MASK)
		mask |= POLLHUP;

691 692
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
693
		if (!tsk->link_cong && !tsk_conn_cong(tsk))
694
			mask |= POLLOUT;
695 696 697
		/* fall thru' */
	case TIPC_LISTEN:
	case TIPC_CONNECTING:
698 699
		if (!skb_queue_empty(&sk->sk_receive_queue))
			mask |= (POLLIN | POLLRDNORM);
700 701 702 703 704 705 706 707 708 709 710
		break;
	case TIPC_OPEN:
		if (!tsk->link_cong)
			mask |= POLLOUT;
		if (tipc_sk_type_connectionless(sk) &&
		    (!skb_queue_empty(&sk->sk_receive_queue)))
			mask |= (POLLIN | POLLRDNORM);
		break;
	case TIPC_DISCONNECTING:
		mask = (POLLIN | POLLRDNORM | POLLHUP);
		break;
711
	}
712 713

	return mask;
P
Per Liden 已提交
714 715
}

716 717 718 719
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
720
 * @msg: message to send
721 722 723 724 725 726 727
 * @dsz: total length of message data
 * @timeo: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
728
			  struct msghdr *msg, size_t dsz, long timeo)
729 730
{
	struct sock *sk = sock->sk;
731
	struct tipc_sock *tsk = tipc_sk(sk);
732
	struct net *net = sock_net(sk);
733
	struct tipc_msg *mhdr = &tsk->phdr;
734
	struct sk_buff_head pktchain;
A
Al Viro 已提交
735
	struct iov_iter save = msg->msg_iter;
736 737 738
	uint mtu;
	int rc;

739 740 741
	if (!timeo && tsk->link_cong)
		return -ELINKCONG;

742 743 744 745 746 747 748 749 750
	msg_set_type(mhdr, TIPC_MCAST_MSG);
	msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(mhdr, 0);
	msg_set_destnode(mhdr, 0);
	msg_set_nametype(mhdr, seq->type);
	msg_set_namelower(mhdr, seq->lower);
	msg_set_nameupper(mhdr, seq->upper);
	msg_set_hdr_sz(mhdr, MCAST_H_SIZE);

751 752
	skb_queue_head_init(&pktchain);

753
new_mtu:
754
	mtu = tipc_bcast_get_mtu(net);
755
	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
756 757 758 759
	if (unlikely(rc < 0))
		return rc;

	do {
760
		rc = tipc_bcast_xmit(net, &pktchain);
761 762 763 764 765
		if (likely(!rc))
			return dsz;

		if (rc == -ELINKCONG) {
			tsk->link_cong = 1;
766
			rc = tipc_wait_for_cond(sock, &timeo, !tsk->link_cong);
767 768
			if (!rc)
				continue;
769
		}
770
		__skb_queue_purge(&pktchain);
A
Al Viro 已提交
771 772
		if (rc == -EMSGSIZE) {
			msg->msg_iter = save;
773
			goto new_mtu;
A
Al Viro 已提交
774
		}
775 776
		break;
	} while (1);
777 778 779
	return rc;
}

780 781 782 783 784 785
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
786
 */
787 788
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
789
{
790
	struct tipc_msg *msg;
791 792
	struct tipc_plist dports;
	u32 portid;
793
	u32 scope = TIPC_CLUSTER_SCOPE;
794 795 796
	struct sk_buff_head tmpq;
	uint hsz;
	struct sk_buff *skb, *_skb;
797

798
	__skb_queue_head_init(&tmpq);
799
	tipc_plist_init(&dports);
800

801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
		msg = buf_msg(skb);
		hsz = skb_headroom(skb) + msg_hdr_sz(msg);

		if (in_own_node(net, msg_orignode(msg)))
			scope = TIPC_NODE_SCOPE;

		/* Create destination port list and message clones: */
		tipc_nametbl_mc_translate(net,
					  msg_nametype(msg), msg_namelower(msg),
					  msg_nameupper(msg), scope, &dports);
		portid = tipc_plist_pop(&dports);
		for (; portid; portid = tipc_plist_pop(&dports)) {
			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
822
		}
823 824 825 826 827 828 829 830 831
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
832
	}
833
	tipc_sk_rcv(net, inputq);
834 835
}

836 837 838
/**
 * tipc_sk_proto_rcv - receive a connection mng protocol message
 * @tsk: receiving socket
839
 * @skb: pointer to message buffer.
840
 */
J
Jon Paul Maloy 已提交
841 842
static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
			      struct sk_buff_head *xmitq)
843
{
844
	struct sock *sk = &tsk->sk;
J
Jon Paul Maloy 已提交
845
	u32 onode = tsk_own_node(tsk);
846 847
	struct tipc_msg *hdr = buf_msg(skb);
	int mtyp = msg_type(hdr);
848
	bool conn_cong;
849

850
	/* Ignore if connection cannot be validated: */
851
	if (!tsk_peer_msg(tsk, hdr))
852 853
		goto exit;

854
	tsk->probe_unacked = false;
855

856 857
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
858 859
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
860 861
		return;
	} else if (mtyp == CONN_ACK) {
862
		conn_cong = tsk_conn_cong(tsk);
863 864 865
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
866
		if (conn_cong)
867 868 869
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
870 871
	}
exit:
872
	kfree_skb(skb);
873 874
}

P
Per Liden 已提交
875
/**
876
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
877 878
 * @sock: socket structure
 * @m: message to send
879
 * @dsz: amount of user data to be sent
880
 *
P
Per Liden 已提交
881
 * Message must have an destination specified explicitly.
882
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
883 884
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
885
 *
P
Per Liden 已提交
886 887
 * Returns the number of bytes sent on success, or errno otherwise
 */
888
static int tipc_sendmsg(struct socket *sock,
889
			struct msghdr *m, size_t dsz)
890 891 892 893 894 895 896 897 898 899 900 901
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
902
{
903
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
904
	struct sock *sk = sock->sk;
905
	struct tipc_sock *tsk = tipc_sk(sk);
906
	struct net *net = sock_net(sk);
907
	struct tipc_msg *mhdr = &tsk->phdr;
908
	u32 dnode, dport;
909
	struct sk_buff_head pktchain;
910
	bool is_connectionless = tipc_sk_type_connectionless(sk);
911
	struct sk_buff *skb;
912
	struct tipc_name_seq *seq;
A
Al Viro 已提交
913
	struct iov_iter save;
914
	u32 mtu;
915
	long timeo;
E
Erik Hugne 已提交
916
	int rc;
P
Per Liden 已提交
917

918
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
919
		return -EMSGSIZE;
920
	if (unlikely(!dest)) {
921
		if (is_connectionless && tsk->peer.family == AF_TIPC)
922
			dest = &tsk->peer;
923 924 925 926 927 928
		else
			return -EDESTADDRREQ;
	} else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
		   dest->family != AF_TIPC) {
		return -EINVAL;
	}
929
	if (!is_connectionless) {
930
		if (sk->sk_state == TIPC_LISTEN)
931
			return -EPIPE;
932
		if (sk->sk_state != TIPC_OPEN)
933 934 935
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
936
		if (dest->addrtype == TIPC_ADDR_NAME) {
937 938
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
939
		}
P
Per Liden 已提交
940
	}
941
	seq = &dest->addr.nameseq;
942
	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
943 944

	if (dest->addrtype == TIPC_ADDR_MCAST) {
945
		return tipc_sendmcast(sock, seq, m, dsz, timeo);
946 947 948 949 950 951 952 953 954 955 956
	} else if (dest->addrtype == TIPC_ADDR_NAME) {
		u32 type = dest->addr.name.name.type;
		u32 inst = dest->addr.name.name.instance;
		u32 domain = dest->addr.name.domain;

		dnode = domain;
		msg_set_type(mhdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(mhdr, NAMED_H_SIZE);
		msg_set_nametype(mhdr, type);
		msg_set_nameinst(mhdr, inst);
		msg_set_lookup_scope(mhdr, tipc_addr_scope(domain));
957
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
958 959
		msg_set_destnode(mhdr, dnode);
		msg_set_destport(mhdr, dport);
960 961
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
962 963 964 965 966 967 968 969 970
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
		msg_set_type(mhdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(mhdr, 0);
		msg_set_destnode(mhdr, dnode);
		msg_set_destport(mhdr, dest->addr.id.ref);
		msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
	}

971
	skb_queue_head_init(&pktchain);
A
Al Viro 已提交
972
	save = m->msg_iter;
973
new_mtu:
974
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
975
	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
976
	if (rc < 0)
977
		return rc;
978 979

	do {
980
		skb = skb_peek(&pktchain);
981
		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
982
		rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
983
		if (likely(!rc)) {
984
			if (!is_connectionless)
985
				tipc_set_sk_state(sk, TIPC_CONNECTING);
986
			return dsz;
987
		}
988 989
		if (rc == -ELINKCONG) {
			tsk->link_cong = 1;
990
			rc = tipc_wait_for_cond(sock, &timeo, !tsk->link_cong);
991 992 993
			if (!rc)
				continue;
		}
994
		__skb_queue_purge(&pktchain);
A
Al Viro 已提交
995 996
		if (rc == -EMSGSIZE) {
			m->msg_iter = save;
997
			goto new_mtu;
A
Al Viro 已提交
998
		}
999 1000
		break;
	} while (1);
1001 1002

	return rc;
P
Per Liden 已提交
1003 1004
}

1005
/**
1006
 * tipc_send_stream - send stream-oriented data
P
Per Liden 已提交
1007
 * @sock: socket structure
1008 1009
 * @m: data to send
 * @dsz: total length of data to be transmitted
1010
 *
1011
 * Used for SOCK_STREAM data.
1012
 *
1013 1014
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1015
 */
1016
static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_send_stream(sock, m, dsz);
	release_sock(sk);

	return ret;
}

static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1029
{
1030
	struct sock *sk = sock->sk;
1031
	struct net *net = sock_net(sk);
1032
	struct tipc_sock *tsk = tipc_sk(sk);
1033
	struct tipc_msg *mhdr = &tsk->phdr;
1034
	struct sk_buff_head pktchain;
1035
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1036
	u32 portid = tsk->portid;
1037
	int rc = -EINVAL;
1038
	long timeo;
1039 1040
	u32 dnode;
	uint mtu, send, sent = 0;
A
Al Viro 已提交
1041
	struct iov_iter save;
1042
	int hlen = MIN_H_SIZE;
P
Per Liden 已提交
1043 1044

	/* Handle implied connection establishment */
1045
	if (unlikely(dest)) {
1046
		rc = __tipc_sendmsg(sock, m, dsz);
1047
		hlen = msg_hdr_sz(mhdr);
1048
		if (dsz && (dsz == rc))
1049
			tsk->snt_unacked = tsk_inc(tsk, dsz + hlen);
1050 1051 1052
		return rc;
	}
	if (dsz > (uint)INT_MAX)
1053 1054
		return -EMSGSIZE;

1055
	if (unlikely(!tipc_sk_connected(sk))) {
1056
		if (sk->sk_state == TIPC_DISCONNECTING)
1057
			return -EPIPE;
1058
		else
1059
			return -ENOTCONN;
1060
	}
1061

1062
	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
1063 1064 1065
	if (!timeo && tsk->link_cong)
		return -ELINKCONG;

1066
	dnode = tsk_peer_node(tsk);
1067
	skb_queue_head_init(&pktchain);
1068 1069

next:
A
Al Viro 已提交
1070
	save = m->msg_iter;
1071
	mtu = tsk->max_pkt;
1072
	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
1073
	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
1074
	if (unlikely(rc < 0))
1075
		return rc;
1076

1077
	do {
1078
		if (likely(!tsk_conn_cong(tsk))) {
1079
			rc = tipc_node_xmit(net, &pktchain, dnode, portid);
1080
			if (likely(!rc)) {
1081
				tsk->snt_unacked += tsk_inc(tsk, send + hlen);
1082 1083
				sent += send;
				if (sent == dsz)
1084
					return dsz;
1085 1086 1087
				goto next;
			}
			if (rc == -EMSGSIZE) {
1088
				__skb_queue_purge(&pktchain);
1089 1090
				tsk->max_pkt = tipc_node_get_mtu(net, dnode,
								 portid);
A
Al Viro 已提交
1091
				m->msg_iter = save;
1092 1093 1094 1095
				goto next;
			}
			if (rc != -ELINKCONG)
				break;
1096

1097
			tsk->link_cong = 1;
1098
		}
1099 1100 1101 1102
		rc = tipc_wait_for_cond(sock, &timeo,
					(!tsk->link_cong &&
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1103
	} while (!rc);
1104

1105
	__skb_queue_purge(&pktchain);
1106
	return sent ? sent : rc;
P
Per Liden 已提交
1107 1108
}

1109
/**
1110
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1111
 * @sock: socket structure
1112 1113
 * @m: message to send
 * @dsz: length of data to be transmitted
1114
 *
1115
 * Used for SOCK_SEQPACKET messages.
1116
 *
1117
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1118
 */
1119
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1120
{
1121 1122
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1123

1124
	return tipc_send_stream(sock, m, dsz);
P
Per Liden 已提交
1125 1126
}

1127
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1128
 */
1129
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1130
				u32 peer_node)
P
Per Liden 已提交
1131
{
1132 1133
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1134
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1135

1136 1137 1138 1139 1140
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1141

1142
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
1143
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1144 1145
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1146
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1147 1148 1149 1150 1151 1152
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1153 1154 1155 1156 1157 1158
}

/**
 * set_orig_addr - capture sender's address for received message
 * @m: descriptor for message info
 * @msg: received message header
1159
 *
P
Per Liden 已提交
1160 1161
 * Note: Address is not captured if not requested by receiver.
 */
S
Sam Ravnborg 已提交
1162
static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
P
Per Liden 已提交
1163
{
1164
	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
P
Per Liden 已提交
1165

1166
	if (addr) {
P
Per Liden 已提交
1167 1168
		addr->family = AF_TIPC;
		addr->addrtype = TIPC_ADDR_ID;
1169
		memset(&addr->addr, 0, sizeof(addr->addr));
P
Per Liden 已提交
1170 1171
		addr->addr.id.ref = msg_origport(msg);
		addr->addr.id.node = msg_orignode(msg);
1172 1173
		addr->addr.name.domain = 0;	/* could leave uninitialized */
		addr->scope = 0;		/* could leave uninitialized */
P
Per Liden 已提交
1174 1175 1176 1177 1178
		m->msg_namelen = sizeof(struct sockaddr_tipc);
	}
}

/**
1179
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1180 1181
 * @m: descriptor for message info
 * @msg: received message header
1182
 * @tsk: TIPC port associated with message
1183
 *
P
Per Liden 已提交
1184
 * Note: Ancillary data is not captured if not requested by receiver.
1185
 *
P
Per Liden 已提交
1186 1187
 * Returns 0 if successful, otherwise errno
 */
1188 1189
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1190 1191 1192 1193
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1194
	int has_name;
P
Per Liden 已提交
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1205 1206
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1207
			return res;
1208 1209 1210 1211 1212 1213
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1214 1215 1216 1217 1218 1219
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1220
		has_name = 1;
P
Per Liden 已提交
1221 1222 1223 1224 1225
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1226
		has_name = 1;
P
Per Liden 已提交
1227 1228 1229 1230 1231
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1232 1233 1234 1235
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1236 1237
		break;
	default:
1238
		has_name = 0;
P
Per Liden 已提交
1239
	}
1240 1241 1242 1243 1244
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1245 1246 1247 1248

	return 0;
}

1249
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1250
{
1251 1252
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1253
	struct sk_buff *skb = NULL;
1254
	struct tipc_msg *msg;
1255 1256
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1257

1258
	if (!tipc_sk_connected(sk))
1259
		return;
1260 1261 1262
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1263
	if (!skb)
1264
		return;
1265
	msg = buf_msg(skb);
1266 1267 1268 1269 1270 1271 1272 1273
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1274
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1275 1276
}

1277
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1278 1279 1280
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1281
	long timeo = *timeop;
Y
Ying Xue 已提交
1282 1283 1284 1285
	int err;

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1286
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1287
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1301 1302 1303
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
1304 1305
	}
	finish_wait(sk_sleep(sk), &wait);
1306
	*timeop = timeo;
Y
Ying Xue 已提交
1307 1308 1309
	return err;
}

1310
/**
1311
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1312 1313 1314
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1315
 *
P
Per Liden 已提交
1316 1317 1318 1319 1320
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1321 1322
static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
			int flags)
P
Per Liden 已提交
1323
{
1324
	struct sock *sk = sock->sk;
1325
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1326 1327
	struct sk_buff *buf;
	struct tipc_msg *msg;
1328
	bool is_connectionless = tipc_sk_type_connectionless(sk);
Y
Ying Xue 已提交
1329
	long timeo;
P
Per Liden 已提交
1330 1331
	unsigned int sz;
	u32 err;
1332
	int res, hlen;
P
Per Liden 已提交
1333

1334
	/* Catch invalid receive requests */
P
Per Liden 已提交
1335 1336 1337
	if (unlikely(!buf_len))
		return -EINVAL;

1338
	lock_sock(sk);
P
Per Liden 已提交
1339

1340
	if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) {
1341
		res = -ENOTCONN;
P
Per Liden 已提交
1342 1343 1344
		goto exit;
	}

Y
Ying Xue 已提交
1345
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1346
restart:
P
Per Liden 已提交
1347

1348
	/* Look for a message in receive queue; wait if necessary */
1349
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1350 1351
	if (res)
		goto exit;
P
Per Liden 已提交
1352

1353 1354
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1355 1356
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
1357
	hlen = msg_hdr_sz(msg);
P
Per Liden 已提交
1358 1359 1360 1361
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1362
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1363 1364 1365 1366 1367 1368 1369
		goto restart;
	}

	/* Capture sender's address (optional) */
	set_orig_addr(m, msg);

	/* Capture ancillary data (optional) */
1370
	res = tipc_sk_anc_data_recv(m, msg, tsk);
1371
	if (res)
P
Per Liden 已提交
1372 1373 1374 1375 1376 1377 1378 1379
		goto exit;

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
		if (unlikely(buf_len < sz)) {
			sz = buf_len;
			m->msg_flags |= MSG_TRUNC;
		}
1380
		res = skb_copy_datagram_msg(buf, hlen, m, sz);
1381
		if (res)
P
Per Liden 已提交
1382 1383 1384
			goto exit;
		res = sz;
	} else {
1385 1386
		if (is_connectionless || err == TIPC_CONN_SHUTDOWN ||
		    m->msg_control)
P
Per Liden 已提交
1387 1388 1389 1390 1391
			res = 0;
		else
			res = -ECONNRESET;
	}

1392 1393 1394
	if (unlikely(flags & MSG_PEEK))
		goto exit;

1395
	if (likely(!is_connectionless)) {
1396 1397 1398
		tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
		if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
			tipc_sk_send_ack(tsk);
1399
	}
1400
	tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1401
exit:
1402
	release_sock(sk);
P
Per Liden 已提交
1403 1404 1405
	return res;
}

1406
/**
1407
 * tipc_recv_stream - receive stream-oriented data
P
Per Liden 已提交
1408 1409 1410
 * @m: descriptor for message info
 * @buf_len: total size of user buffer area
 * @flags: receive flags
1411 1412
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1413 1414 1415 1416
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1417 1418
static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
			    size_t buf_len, int flags)
P
Per Liden 已提交
1419
{
1420
	struct sock *sk = sock->sk;
1421
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
1422 1423
	struct sk_buff *buf;
	struct tipc_msg *msg;
Y
Ying Xue 已提交
1424
	long timeo;
P
Per Liden 已提交
1425
	unsigned int sz;
1426
	int target;
P
Per Liden 已提交
1427 1428
	int sz_copied = 0;
	u32 err;
1429
	int res = 0, hlen;
P
Per Liden 已提交
1430

1431
	/* Catch invalid receive attempts */
P
Per Liden 已提交
1432 1433 1434
	if (unlikely(!buf_len))
		return -EINVAL;

1435
	lock_sock(sk);
P
Per Liden 已提交
1436

1437
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1438
		res = -ENOTCONN;
P
Per Liden 已提交
1439 1440 1441
		goto exit;
	}

1442
	target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
Y
Ying Xue 已提交
1443
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1444

1445
restart:
1446
	/* Look for a message in receive queue; wait if necessary */
1447
	res = tipc_wait_for_rcvmsg(sock, &timeo);
Y
Ying Xue 已提交
1448 1449
	if (res)
		goto exit;
P
Per Liden 已提交
1450

1451 1452
	/* Look at first message in receive queue */
	buf = skb_peek(&sk->sk_receive_queue);
P
Per Liden 已提交
1453 1454
	msg = buf_msg(buf);
	sz = msg_data_sz(msg);
1455
	hlen = msg_hdr_sz(msg);
P
Per Liden 已提交
1456 1457 1458 1459
	err = msg_errcode(msg);

	/* Discard an empty non-errored message & try again */
	if ((!sz) && (!err)) {
1460
		tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1461 1462 1463 1464 1465 1466
		goto restart;
	}

	/* Optionally capture sender's address & ancillary data of first msg */
	if (sz_copied == 0) {
		set_orig_addr(m, msg);
1467
		res = tipc_sk_anc_data_recv(m, msg, tsk);
1468
		if (res)
P
Per Liden 已提交
1469 1470 1471 1472 1473
			goto exit;
	}

	/* Capture message data (if valid) & compute return value (always) */
	if (!err) {
1474 1475 1476
		u32 offset = TIPC_SKB_CB(buf)->bytes_read;
		u32 needed;
		int sz_to_copy;
P
Per Liden 已提交
1477

1478
		sz -= offset;
P
Per Liden 已提交
1479
		needed = (buf_len - sz_copied);
1480
		sz_to_copy = min(sz, needed);
1481

1482
		res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
1483
		if (res)
P
Per Liden 已提交
1484
			goto exit;
1485

P
Per Liden 已提交
1486 1487 1488 1489
		sz_copied += sz_to_copy;

		if (sz_to_copy < sz) {
			if (!(flags & MSG_PEEK))
1490 1491
				TIPC_SKB_CB(buf)->bytes_read =
					offset + sz_to_copy;
P
Per Liden 已提交
1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503
			goto exit;
		}
	} else {
		if (sz_copied != 0)
			goto exit; /* can't add error msg to valid data */

		if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
			res = 0;
		else
			res = -ECONNRESET;
	}

1504 1505 1506 1507 1508 1509 1510
	if (unlikely(flags & MSG_PEEK))
		goto exit;

	tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
	if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
		tipc_sk_send_ack(tsk);
	tsk_advance_rx_queue(sk);
P
Per Liden 已提交
1511 1512

	/* Loop around if more data is required */
1513 1514
	if ((sz_copied < buf_len) &&	/* didn't get all requested data */
	    (!skb_queue_empty(&sk->sk_receive_queue) ||
1515
	    (sz_copied < target)) &&	/* and more is ready or required */
1516
	    (!err))			/* and haven't reached a FIN */
P
Per Liden 已提交
1517 1518 1519
		goto restart;

exit:
1520
	release_sock(sk);
1521
	return sz_copied ? sz_copied : res;
P
Per Liden 已提交
1522 1523
}

1524 1525 1526 1527 1528 1529 1530 1531 1532 1533
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1534
	if (skwq_has_sleeper(wq))
1535 1536 1537 1538 1539 1540 1541 1542 1543 1544
		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
						POLLWRNORM | POLLWRBAND);
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1545
static void tipc_data_ready(struct sock *sk)
1546 1547 1548 1549 1550
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1551
	if (skwq_has_sleeper(wq))
1552 1553 1554 1555 1556
		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
						POLLRDNORM | POLLRDBAND);
	rcu_read_unlock();
}

1557 1558 1559 1560 1561
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

1562 1563
/**
 * filter_connect - Handle all incoming messages for a connection-based socket
1564
 * @tsk: TIPC socket
1565
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1566
 *
1567
 * Returns true if everything ok, false otherwise
1568
 */
1569
static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1570
{
1571
	struct sock *sk = &tsk->sk;
1572
	struct net *net = sock_net(sk);
1573
	struct tipc_msg *hdr = buf_msg(skb);
1574

1575 1576
	if (unlikely(msg_mcast(hdr)))
		return false;
1577

1578 1579
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1580 1581 1582
		/* Accept only ACK or NACK message */
		if (unlikely(!msg_connected(hdr)))
			return false;
1583

1584
		if (unlikely(msg_errcode(hdr))) {
1585
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1586
			sk->sk_err = ECONNREFUSED;
1587
			return true;
1588 1589
		}

1590
		if (unlikely(!msg_isdata(hdr))) {
1591
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1592
			sk->sk_err = EINVAL;
1593
			return true;
1594 1595
		}

1596 1597
		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
		msg_set_importance(&tsk->phdr, msg_importance(hdr));
1598

1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610
		/* If 'ACK+' message, add to socket receive queue */
		if (msg_data_sz(hdr))
			return true;

		/* If empty 'ACK-' message, wake up sleeping connect() */
		if (waitqueue_active(sk_sleep(sk)))
			wake_up_interruptible(sk_sleep(sk));

		/* 'ACK-' message is neither accepted nor rejected: */
		msg_set_dest_droppable(hdr, 1);
		return false;

1611
	case TIPC_OPEN:
1612
	case TIPC_DISCONNECTING:
1613 1614
		break;
	case TIPC_LISTEN:
1615
		/* Accept only SYN message */
1616 1617
		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
			return true;
1618
		break;
1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
		if (unlikely(!tsk_peer_msg(tsk, hdr)))
			return false;

		if (unlikely(msg_errcode(hdr))) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			/* Let timer expire on it's own */
			tipc_node_remove_conn(net, tsk_peer_node(tsk),
					      tsk->portid);
			sk->sk_state_change(sk);
		}
		return true;
1632
	default:
1633
		pr_err("Unknown sk_state %u\n", sk->sk_state);
1634
	}
1635

1636
	return false;
1637 1638
}

1639 1640 1641
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
1642
 * @skb: message
1643
 *
1644 1645
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
1646
 *
1647 1648
 * For connectionless messages, queue limits are based on message
 * importance as follows:
1649
 *
1650 1651 1652 1653
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
1654 1655 1656
 *
 * Returns overload limit according to corresponding message importance
 */
1657
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1658
{
1659 1660 1661 1662 1663
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
1664

1665 1666
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
1667

1668
	return FLOWCTL_MSG_LIM;
1669 1670
}

1671
/**
1672 1673
 * filter_rcv - validate incoming message
 * @sk: socket
1674
 * @skb: pointer to message.
1675
 *
1676 1677 1678
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
1679
 * Called with socket lock already taken
1680
 *
1681
 * Returns true if message was added to socket receive queue, otherwise false
P
Per Liden 已提交
1682
 */
J
Jon Paul Maloy 已提交
1683 1684
static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
		       struct sk_buff_head *xmitq)
P
Per Liden 已提交
1685
{
1686
	struct tipc_sock *tsk = tipc_sk(sk);
1687 1688 1689 1690
	struct tipc_msg *hdr = buf_msg(skb);
	unsigned int limit = rcvbuf_limit(sk, skb);
	int err = TIPC_OK;
	int usr = msg_user(hdr);
P
Per Liden 已提交
1691

1692
	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
J
Jon Paul Maloy 已提交
1693
		tipc_sk_proto_rcv(tsk, skb, xmitq);
1694
		return false;
1695
	}
1696

1697 1698
	if (unlikely(usr == SOCK_WAKEUP)) {
		kfree_skb(skb);
1699 1700
		tsk->link_cong = 0;
		sk->sk_write_space(sk);
1701
		return false;
1702 1703
	}

1704 1705 1706 1707 1708
	/* Drop if illegal message type */
	if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
		kfree_skb(skb);
		return false;
	}
1709

1710
	/* Reject if wrong message type for current socket state */
1711
	if (tipc_sk_type_connectionless(sk)) {
1712 1713 1714 1715 1716 1717 1718
		if (msg_connected(hdr)) {
			err = TIPC_ERR_NO_PORT;
			goto reject;
		}
	} else if (unlikely(!filter_connect(tsk, skb))) {
		err = TIPC_ERR_NO_PORT;
		goto reject;
P
Per Liden 已提交
1719 1720 1721
	}

	/* Reject message if there isn't room to queue it */
1722 1723 1724 1725
	if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
		err = TIPC_ERR_OVERLOAD;
		goto reject;
	}
P
Per Liden 已提交
1726

1727
	/* Enqueue message */
1728
	TIPC_SKB_CB(skb)->bytes_read = 0;
1729 1730
	__skb_queue_tail(&sk->sk_receive_queue, skb);
	skb_set_owner_r(skb, sk);
1731

1732
	sk->sk_data_ready(sk);
1733 1734 1735
	return true;

reject:
J
Jon Paul Maloy 已提交
1736 1737
	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
		__skb_queue_tail(xmitq, skb);
1738
	return false;
1739
}
P
Per Liden 已提交
1740

1741
/**
1742
 * tipc_backlog_rcv - handle incoming message from backlog queue
1743
 * @sk: socket
1744
 * @skb: message
1745
 *
1746
 * Caller must hold socket lock
1747 1748 1749
 *
 * Returns 0
 */
1750
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1751
{
1752
	unsigned int truesize = skb->truesize;
J
Jon Paul Maloy 已提交
1753 1754
	struct sk_buff_head xmitq;
	u32 dnode, selector;
1755

J
Jon Paul Maloy 已提交
1756 1757 1758
	__skb_queue_head_init(&xmitq);

	if (likely(filter_rcv(sk, skb, &xmitq))) {
1759
		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770
		return 0;
	}

	if (skb_queue_empty(&xmitq))
		return 0;

	/* Send response/rejected message */
	skb = __skb_dequeue(&xmitq);
	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
1771 1772 1773
	return 0;
}

1774
/**
1775 1776 1777 1778 1779
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
1780 1781 1782
 *
 * Caller must hold socket lock
 */
1783
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
1784
			    u32 dport, struct sk_buff_head *xmitq)
1785
{
J
Jon Paul Maloy 已提交
1786 1787
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
1788 1789
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
1790
	u32 onode;
1791 1792

	while (skb_queue_len(inputq)) {
1793
		if (unlikely(time_after_eq(jiffies, time_limit)))
1794 1795
			return;

1796 1797
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
1798 1799 1800
			return;

		/* Add message directly to receive queue if possible */
1801
		if (!sock_owned_by_user(sk)) {
J
Jon Paul Maloy 已提交
1802
			filter_rcv(sk, skb, xmitq);
1803
			continue;
1804
		}
1805 1806

		/* Try backlog, compensating for double-counted bytes */
1807
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
1808
		if (!sk->sk_backlog.len)
1809 1810 1811 1812
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
1813 1814

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
1815 1816 1817
		onode = tipc_own_addr(sock_net(sk));
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
1818
		break;
1819
	}
1820 1821
}

1822
/**
1823 1824 1825 1826
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
1827
 */
1828
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1829
{
J
Jon Paul Maloy 已提交
1830
	struct sk_buff_head xmitq;
1831
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
1832
	int err;
1833 1834
	struct tipc_sock *tsk;
	struct sock *sk;
1835
	struct sk_buff *skb;
1836

J
Jon Paul Maloy 已提交
1837
	__skb_queue_head_init(&xmitq);
1838 1839 1840
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
1841

1842 1843 1844
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
1845
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
1846 1847
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
1848 1849 1850 1851 1852
			/* Send pending response/rejected messages, if any */
			while ((skb = __skb_dequeue(&xmitq))) {
				dnode = msg_destnode(buf_msg(skb));
				tipc_node_xmit_skb(net, skb, dnode, dport);
			}
1853 1854 1855
			sock_put(sk);
			continue;
		}
1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868

		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
1869
			continue;
1870
xmit:
1871
		dnode = msg_destnode(buf_msg(skb));
1872
		tipc_node_xmit_skb(net, skb, dnode, dport);
1873
	}
P
Per Liden 已提交
1874 1875
}

Y
Ying Xue 已提交
1876 1877
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
1878
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
1891
		add_wait_queue(sk_sleep(sk), &wait);
1892
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
1893 1894
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
1895 1896 1897 1898
	} while (!done);
	return 0;
}

P
Per Liden 已提交
1899
/**
1900
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
1901 1902 1903
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
1904
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
1905 1906 1907
 *
 * Returns 0 on success, errno otherwise
 */
1908 1909
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
1910
{
1911
	struct sock *sk = sock->sk;
1912
	struct tipc_sock *tsk = tipc_sk(sk);
1913 1914
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
1915
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
1916
	int previous;
1917
	int res = 0;
1918

1919 1920
	lock_sock(sk);

1921
	/* DGRAM/RDM connect(), just save the destaddr */
1922
	if (tipc_sk_type_connectionless(sk)) {
1923
		if (dst->family == AF_UNSPEC) {
1924
			memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
1925 1926
		} else if (destlen != sizeof(struct sockaddr_tipc)) {
			res = -EINVAL;
1927
		} else {
1928
			memcpy(&tsk->peer, dest, destlen);
1929
		}
1930 1931
		goto exit;
	}
1932 1933 1934 1935 1936 1937 1938

	/*
	 * Reject connection attempt using multicast address
	 *
	 * Note: send_msg() validates the rest of the address fields,
	 *       so there's no need to do it here
	 */
1939 1940 1941 1942 1943
	if (dst->addrtype == TIPC_ADDR_MCAST) {
		res = -EINVAL;
		goto exit;
	}

1944
	previous = sk->sk_state;
1945 1946 1947

	switch (sk->sk_state) {
	case TIPC_OPEN:
1948 1949 1950 1951 1952 1953 1954 1955 1956 1957
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

1958
		res = __tipc_sendmsg(sock, &m, 0);
1959 1960 1961
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

1962
		/* Just entered TIPC_CONNECTING state; the only
1963 1964 1965 1966
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
1967 1968 1969 1970 1971
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
1972
			goto exit;
1973
		}
Y
Ying Xue 已提交
1974 1975 1976
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
1977 1978
		break;
	case TIPC_ESTABLISHED:
1979
		res = -EISCONN;
1980 1981
		break;
	default:
1982
		res = -EINVAL;
1983
	}
1984

1985 1986
exit:
	release_sock(sk);
1987
	return res;
P
Per Liden 已提交
1988 1989
}

1990
/**
1991
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
1992 1993
 * @sock: socket structure
 * @len: (unused)
1994
 *
P
Per Liden 已提交
1995 1996
 * Returns 0 on success, errno otherwise
 */
1997
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
1998
{
1999 2000 2001 2002
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
2003
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
2004
	release_sock(sk);
2005

2006
	return res;
P
Per Liden 已提交
2007 2008
}

Y
Ying Xue 已提交
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
2023
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
2024 2025 2026 2027 2028 2029 2030 2031 2032 2033
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
2034 2035 2036
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
2037 2038 2039 2040 2041
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2042
/**
2043
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2044 2045 2046
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2047
 *
P
Per Liden 已提交
2048 2049
 * Returns 0 on success, errno otherwise
 */
2050
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
P
Per Liden 已提交
2051
{
2052
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2053
	struct sk_buff *buf;
2054
	struct tipc_sock *new_tsock;
2055
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2056
	long timeo;
2057
	int res;
P
Per Liden 已提交
2058

2059
	lock_sock(sk);
P
Per Liden 已提交
2060

2061
	if (sk->sk_state != TIPC_LISTEN) {
2062
		res = -EINVAL;
P
Per Liden 已提交
2063 2064
		goto exit;
	}
Y
Ying Xue 已提交
2065 2066 2067 2068
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2069 2070 2071

	buf = skb_peek(&sk->sk_receive_queue);

2072
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
2073 2074
	if (res)
		goto exit;
2075
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2076

2077
	new_sk = new_sock->sk;
2078
	new_tsock = tipc_sk(new_sk);
2079
	msg = buf_msg(buf);
P
Per Liden 已提交
2080

2081 2082 2083 2084 2085 2086 2087
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2088
	tsk_rej_rx_queue(new_sk);
2089 2090

	/* Connect new socket to it's peer */
2091
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2092

2093
	tsk_set_importance(new_tsock, msg_importance(msg));
2094
	if (msg_named(msg)) {
2095 2096
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2097
	}
2098 2099 2100 2101 2102 2103 2104 2105

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2106
		tsk_advance_rx_queue(sk);
2107
		__tipc_send_stream(new_sock, &m, 0);
2108 2109 2110
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2111
		skb_set_owner_r(buf, new_sk);
2112 2113
	}
	release_sock(new_sk);
P
Per Liden 已提交
2114
exit:
2115
	release_sock(sk);
P
Per Liden 已提交
2116 2117 2118 2119
	return res;
}

/**
2120
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2121
 * @sock: socket structure
2122
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2123 2124
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2125
 *
P
Per Liden 已提交
2126 2127
 * Returns 0 on success, errno otherwise
 */
2128
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2129
{
2130
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2131 2132
	int res;

2133 2134
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2135

2136
	lock_sock(sk);
P
Per Liden 已提交
2137

2138 2139
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2140

2141
	if (sk->sk_state == TIPC_DISCONNECTING) {
2142
		/* Discard any unreceived messages */
2143
		__skb_queue_purge(&sk->sk_receive_queue);
2144 2145 2146

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2147
		res = 0;
2148
	} else {
P
Per Liden 已提交
2149 2150 2151
		res = -ENOTCONN;
	}

2152
	release_sock(sk);
P
Per Liden 已提交
2153 2154 2155
	return res;
}

2156
static void tipc_sk_timeout(unsigned long data)
2157
{
2158 2159
	struct tipc_sock *tsk = (struct tipc_sock *)data;
	struct sock *sk = &tsk->sk;
2160
	struct sk_buff *skb = NULL;
2161
	u32 peer_port, peer_node;
2162
	u32 own_node = tsk_own_node(tsk);
2163

J
Jon Paul Maloy 已提交
2164
	bh_lock_sock(sk);
2165
	if (!tipc_sk_connected(sk)) {
J
Jon Paul Maloy 已提交
2166 2167
		bh_unlock_sock(sk);
		goto exit;
2168
	}
2169 2170
	peer_port = tsk_peer_port(tsk);
	peer_node = tsk_peer_node(tsk);
2171

2172
	if (tsk->probe_unacked) {
2173
		if (!sock_owned_by_user(sk)) {
2174
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2175 2176 2177 2178 2179 2180 2181 2182
			tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
					      tsk_peer_port(tsk));
			sk->sk_state_change(sk);
		} else {
			/* Try again later */
			sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
		}

2183 2184
		bh_unlock_sock(sk);
		goto exit;
2185
	}
2186 2187 2188 2189

	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
			      INT_H_SIZE, 0, peer_node, own_node,
			      peer_port, tsk->portid, TIPC_OK);
2190
	tsk->probe_unacked = true;
2191
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
2192
	bh_unlock_sock(sk);
2193
	if (skb)
2194
		tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
J
Jon Paul Maloy 已提交
2195
exit:
2196
	sock_put(sk);
2197 2198
}

2199
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2200 2201
			   struct tipc_name_seq const *seq)
{
2202 2203
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2204 2205 2206
	struct publication *publ;
	u32 key;

2207
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2208
		return -EINVAL;
2209 2210
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2211 2212
		return -EADDRINUSE;

2213
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2214
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2215 2216 2217
	if (unlikely(!publ))
		return -EINVAL;

2218 2219 2220
	list_add(&publ->pport_list, &tsk->publications);
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2221 2222 2223
	return 0;
}

2224
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2225 2226
			    struct tipc_name_seq const *seq)
{
2227
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2228 2229 2230 2231
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

2232
	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
J
Jon Paul Maloy 已提交
2233 2234 2235 2236 2237 2238 2239 2240 2241
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2242
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2243 2244 2245 2246
					      publ->ref, publ->key);
			rc = 0;
			break;
		}
2247
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2248 2249 2250
				      publ->ref, publ->key);
		rc = 0;
	}
2251 2252
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2253 2254 2255
	return rc;
}

2256 2257 2258
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2259
void tipc_sk_reinit(struct net *net)
2260
{
2261
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2262 2263 2264
	const struct bucket_table *tbl;
	struct rhash_head *pos;
	struct tipc_sock *tsk;
2265
	struct tipc_msg *msg;
2266
	int i;
2267

2268
	rcu_read_lock();
2269
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2270 2271 2272 2273
	for (i = 0; i < tbl->size; i++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2274 2275
			msg_set_prevnode(msg, tn->own_addr);
			msg_set_orignode(msg, tn->own_addr);
2276 2277
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2278
	}
2279
	rcu_read_unlock();
2280 2281
}

2282
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2283
{
2284
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2285
	struct tipc_sock *tsk;
2286

2287
	rcu_read_lock();
2288
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2289 2290 2291
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2292

2293
	return tsk;
2294 2295
}

2296
static int tipc_sk_insert(struct tipc_sock *tsk)
2297
{
2298 2299 2300
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2301 2302
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2303

2304 2305 2306 2307 2308 2309
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2310 2311
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2312 2313
			return 0;
		sock_put(&tsk->sk);
2314 2315
	}

2316
	return -1;
2317 2318
}

2319
static void tipc_sk_remove(struct tipc_sock *tsk)
2320
{
2321
	struct sock *sk = &tsk->sk;
2322
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2323

2324
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2325 2326
		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
		__sock_put(sk);
2327 2328 2329
	}
}

2330 2331 2332 2333 2334 2335 2336
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2337
	.automatic_shrinking = true,
2338 2339
};

2340
int tipc_sk_rht_init(struct net *net)
2341
{
2342
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2343 2344

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2345 2346
}

2347
void tipc_sk_rht_destroy(struct net *net)
2348
{
2349 2350
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2351 2352
	/* Wait for socket readers to complete */
	synchronize_net();
2353

2354
	rhashtable_destroy(&tn->sk_rht);
2355 2356
}

P
Per Liden 已提交
2357
/**
2358
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2359 2360 2361 2362 2363
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2364 2365
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2366
 * (to ease compatibility).
2367
 *
P
Per Liden 已提交
2368 2369
 * Returns 0 on success, errno otherwise
 */
2370 2371
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2372
{
2373
	struct sock *sk = sock->sk;
2374
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
2375 2376 2377
	u32 value;
	int res;

2378 2379
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2380 2381 2382 2383
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
	if (ol < sizeof(value))
		return -EINVAL;
2384 2385
	res = get_user(value, (u32 __user *)ov);
	if (res)
P
Per Liden 已提交
2386 2387
		return res;

2388
	lock_sock(sk);
2389

P
Per Liden 已提交
2390 2391
	switch (opt) {
	case TIPC_IMPORTANCE:
2392
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2393 2394 2395
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2396
			tsk_set_unreliable(tsk, value);
2397
		else
P
Per Liden 已提交
2398 2399 2400
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2401
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2402 2403
		break;
	case TIPC_CONN_TIMEOUT:
2404
		tipc_sk(sk)->conn_timeout = value;
2405
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2406 2407 2408 2409 2410
		break;
	default:
		res = -EINVAL;
	}

2411 2412
	release_sock(sk);

P
Per Liden 已提交
2413 2414 2415 2416
	return res;
}

/**
2417
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2418 2419 2420 2421 2422
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2423 2424
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2425
 * (to ease compatibility).
2426
 *
P
Per Liden 已提交
2427 2428
 * Returns 0 on success, errno otherwise
 */
2429 2430
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2431
{
2432
	struct sock *sk = sock->sk;
2433
	struct tipc_sock *tsk = tipc_sk(sk);
2434
	int len;
P
Per Liden 已提交
2435
	u32 value;
2436
	int res;
P
Per Liden 已提交
2437

2438 2439
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2440 2441
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2442 2443
	res = get_user(len, ol);
	if (res)
2444
		return res;
P
Per Liden 已提交
2445

2446
	lock_sock(sk);
P
Per Liden 已提交
2447 2448 2449

	switch (opt) {
	case TIPC_IMPORTANCE:
2450
		value = tsk_importance(tsk);
P
Per Liden 已提交
2451 2452
		break;
	case TIPC_SRC_DROPPABLE:
2453
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2454 2455
		break;
	case TIPC_DEST_DROPPABLE:
2456
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2457 2458
		break;
	case TIPC_CONN_TIMEOUT:
2459
		value = tsk->conn_timeout;
2460
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2461
		break;
2462
	case TIPC_NODE_RECVQ_DEPTH:
2463
		value = 0; /* was tipc_queue_size, now obsolete */
2464
		break;
2465
	case TIPC_SOCK_RECVQ_DEPTH:
2466 2467
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
P
Per Liden 已提交
2468 2469 2470 2471
	default:
		res = -EINVAL;
	}

2472 2473
	release_sock(sk);

2474 2475
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2476

2477 2478 2479 2480 2481 2482 2483
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2484 2485
}

2486
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2487
{
2488
	struct sock *sk = sock->sk;
E
Erik Hugne 已提交
2489 2490 2491 2492 2493 2494 2495
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
2496 2497
		if (!tipc_node_get_linkname(sock_net(sk),
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

2509 2510
/* Protocol switches for the various types of TIPC sockets */

2511
static const struct proto_ops msg_ops = {
2512
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2513
	.family		= AF_TIPC,
2514 2515 2516
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2517
	.socketpair	= sock_no_socketpair,
2518
	.accept		= sock_no_accept,
2519 2520
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2521
	.ioctl		= tipc_ioctl,
2522
	.listen		= sock_no_listen,
2523 2524 2525 2526 2527
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
2528 2529
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2530 2531
};

2532
static const struct proto_ops packet_ops = {
2533
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2534
	.family		= AF_TIPC,
2535 2536 2537
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2538
	.socketpair	= sock_no_socketpair,
2539 2540 2541
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2542
	.ioctl		= tipc_ioctl,
2543 2544 2545 2546 2547 2548
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
2549 2550
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2551 2552
};

2553
static const struct proto_ops stream_ops = {
2554
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2555
	.family		= AF_TIPC,
2556 2557 2558
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2559
	.socketpair	= sock_no_socketpair,
2560 2561 2562
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2563
	.ioctl		= tipc_ioctl,
2564 2565 2566 2567 2568 2569
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_stream,
	.recvmsg	= tipc_recv_stream,
2570 2571
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2572 2573
};

2574
static const struct net_proto_family tipc_family_ops = {
2575
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2576
	.family		= AF_TIPC,
2577
	.create		= tipc_sk_create
P
Per Liden 已提交
2578 2579 2580 2581 2582
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
2583 2584
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
2585 2586 2587
};

/**
2588
 * tipc_socket_init - initialize TIPC socket interface
2589
 *
P
Per Liden 已提交
2590 2591
 * Returns 0 on success, errno otherwise
 */
2592
int tipc_socket_init(void)
P
Per Liden 已提交
2593 2594 2595
{
	int res;

2596
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
2597
	if (res) {
2598
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
2599 2600 2601 2602 2603
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
2604
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
2605 2606 2607 2608 2609 2610 2611 2612
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
2613
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
2614
 */
2615
void tipc_socket_stop(void)
P
Per Liden 已提交
2616 2617 2618 2619
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
2620 2621

/* Caller should hold socket lock for the passed tipc socket. */
2622
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2657 2658
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
2659 2660 2661 2662
{
	int err;
	void *hdr;
	struct nlattr *attrs;
2663 2664
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2665
	struct sock *sk = &tsk->sk;
2666 2667

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2668
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
2669 2670 2671 2672 2673 2674
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
2675
	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
2676
		goto attr_msg_cancel;
2677
	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
2678 2679
		goto attr_msg_cancel;

2680
	if (tipc_sk_connected(sk)) {
2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704
		err = __tipc_nl_add_sk_con(skb, tsk);
		if (err)
			goto attr_msg_cancel;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			goto attr_msg_cancel;
	}
	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
	struct tipc_sock *tsk;
2705 2706
	const struct bucket_table *tbl;
	struct rhash_head *pos;
2707 2708
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2709 2710
	u32 tbl_id = cb->args[0];
	u32 prev_portid = cb->args[1];
2711

2712
	rcu_read_lock();
2713
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2714 2715
	for (; tbl_id < tbl->size; tbl_id++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
2716
			spin_lock_bh(&tsk->sk.sk_lock.slock);
2717 2718 2719 2720 2721
			if (prev_portid && prev_portid != tsk->portid) {
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				continue;
			}

2722
			err = __tipc_nl_add_sk(skb, cb, tsk);
2723 2724 2725 2726 2727 2728
			if (err) {
				prev_portid = tsk->portid;
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				goto out;
			}
			prev_portid = 0;
2729 2730
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2731
	}
2732
out:
2733
	rcu_read_unlock();
2734 2735
	cb->args[0] = tbl_id;
	cb->args[1] = prev_portid;
2736 2737 2738

	return skb->len;
}
2739 2740

/* Caller should hold socket lock for the passed tipc socket. */
2741 2742 2743
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
2744 2745 2746 2747 2748
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2749
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2780 2781 2782
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822
{
	int err;
	struct publication *p;

	if (*last_publ) {
		list_for_each_entry(p, &tsk->publications, pport_list) {
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
				     pport_list);
	}

	list_for_each_entry_from(p, &tsk->publications, pport_list) {
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
2823
	u32 tsk_portid = cb->args[0];
2824 2825
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
2826
	struct net *net = sock_net(skb->sk);
2827 2828
	struct tipc_sock *tsk;

2829
	if (!tsk_portid) {
2830 2831 2832 2833 2834 2835 2836
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

2837 2838 2839
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

2840 2841 2842 2843 2844 2845 2846 2847 2848
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
				       tipc_nl_sock_policy);
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

2849
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
2850 2851 2852 2853 2854
	}

	if (done)
		return 0;

2855
	tsk = tipc_sk_lookup(net, tsk_portid);
2856 2857 2858 2859 2860 2861 2862 2863
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
2864
	sock_put(&tsk->sk);
2865

2866
	cb->args[0] = tsk_portid;
2867 2868 2869 2870 2871
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}