socket.c 89.5 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
J
Jon Maloy 已提交
4
 * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
38 39
#include <linux/sched/signal.h>

P
Per Liden 已提交
40
#include "core.h"
41
#include "name_table.h"
E
Erik Hugne 已提交
42
#include "node.h"
43
#include "link.h"
44
#include "name_distr.h"
45
#include "socket.h"
46
#include "bcast.h"
47
#include "netlink.h"
J
Jon Maloy 已提交
48
#include "group.h"
49

50
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
51
#define CONN_PROBING_INTV	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
52 53 54
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
55
#define TIPC_ACK_RATE		4       /* ACK at 1/4 of of rcv window size */
56

57 58
enum {
	TIPC_LISTEN = TCP_LISTEN,
59
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
60
	TIPC_OPEN = TCP_CLOSE,
61
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
62
	TIPC_CONNECTING = TCP_SYN_SENT,
63 64
};

65 66 67 68 69
struct sockaddr_pair {
	struct sockaddr_tipc sock;
	struct sockaddr_tipc member;
};

70 71 72 73 74 75 76
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
77
 * @portid: unique port identity in TIPC socket hash table
78
 * @phdr: preformatted message header used when sending messages
79
 * #cong_links: list of congested links
80
 * @publications: list of publications for port
81
 * @blocking_link: address of the congested link we are currently sleeping on
82 83 84 85
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
86
 * @cong_link_cnt: number of congested links
J
Jon Maloy 已提交
87
 * @snt_unacked: # messages sent by socket, and not yet acked by peer
88
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
89
 * @peer: 'connected' peer for dgram/rdm
90
 * @node: hash table node
91
 * @mc_method: cookie for use between socket and broadcast layer
92
 * @rcu: rcu struct for tipc_sock
93 94 95 96 97 98 99
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
100
	u32 portid;
101
	struct tipc_msg phdr;
102
	struct list_head cong_links;
103 104 105 106
	struct list_head publications;
	u32 pub_count;
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
107
	bool probe_unacked;
108
	u16 cong_link_cnt;
109 110
	u16 snt_unacked;
	u16 snd_win;
111
	u16 peer_caps;
112 113
	u16 rcv_unacked;
	u16 rcv_win;
114
	struct sockaddr_tipc peer;
115
	struct rhash_head node;
116
	struct tipc_mc_method mc_method;
117
	struct rcu_head rcu;
J
Jon Maloy 已提交
118
	struct tipc_group *group;
119
	bool group_is_open;
120
};
P
Per Liden 已提交
121

J
Jon Maloy 已提交
122
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
123
static void tipc_data_ready(struct sock *sk);
124
static void tipc_write_space(struct sock *sk);
125
static void tipc_sock_destruct(struct sock *sk);
126
static int tipc_release(struct socket *sock);
127 128
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern);
129
static void tipc_sk_timeout(struct timer_list *t);
130
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
131
			   struct tipc_name_seq const *seq);
132
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
133
			    struct tipc_name_seq const *seq);
J
Jon Maloy 已提交
134
static int tipc_sk_leave(struct tipc_sock *tsk);
135
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
136 137
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
138
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
139
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
140

141 142 143
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
144
static struct proto tipc_proto;
145 146
static const struct rhashtable_params tsk_rht_params;

147 148 149 150 151
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

152
static u32 tsk_peer_node(struct tipc_sock *tsk)
153
{
154
	return msg_destnode(&tsk->phdr);
155 156
}

157
static u32 tsk_peer_port(struct tipc_sock *tsk)
158
{
159
	return msg_destport(&tsk->phdr);
160 161
}

162
static  bool tsk_unreliable(struct tipc_sock *tsk)
163
{
164
	return msg_src_droppable(&tsk->phdr) != 0;
165 166
}

167
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
168
{
169
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
170 171
}

172
static bool tsk_unreturnable(struct tipc_sock *tsk)
173
{
174
	return msg_dest_droppable(&tsk->phdr) != 0;
175 176
}

177
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
178
{
179
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
180 181
}

182
static int tsk_importance(struct tipc_sock *tsk)
183
{
184
	return msg_importance(&tsk->phdr);
185 186
}

187
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
188 189 190
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
191
	msg_set_importance(&tsk->phdr, (u32)imp);
192 193
	return 0;
}
194

195 196 197 198 199
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

200
static bool tsk_conn_cong(struct tipc_sock *tsk)
201
{
202
	return tsk->snt_unacked > tsk->snd_win;
203 204
}

205 206 207 208 209
static u16 tsk_blocks(int len)
{
	return ((len / FLOWCTL_BLK_SZ) + 1);
}

210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
228 229
}

230
/**
231
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
232 233
 *
 * Caller must hold socket lock
P
Per Liden 已提交
234
 */
235
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
236
{
237
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
238 239
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
256
/**
257
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
258 259
 *
 * Caller must hold socket lock
P
Per Liden 已提交
260
 */
261
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
262
{
263
	struct sk_buff *skb;
264

265 266
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
267 268
}

269 270
static bool tipc_sk_connected(struct sock *sk)
{
271
	return sk->sk_state == TIPC_ESTABLISHED;
272 273
}

274 275 276 277 278 279 280 281 282 283
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

284
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
285 286 287 288
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
289
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
290
{
291
	struct sock *sk = &tsk->sk;
292
	u32 self = tipc_own_addr(sock_net(sk));
293
	u32 peer_port = tsk_peer_port(tsk);
294
	u32 orig_node, peer_node;
J
Jon Paul Maloy 已提交
295

296
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
297 298 299 300 301 302
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
303
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
304 305 306 307

	if (likely(orig_node == peer_node))
		return true;

308
	if (!orig_node && peer_node == self)
J
Jon Paul Maloy 已提交
309 310
		return true;

311
	if (!peer_node && orig_node == self)
J
Jon Paul Maloy 已提交
312 313 314 315 316
		return true;

	return false;
}

317 318 319 320 321 322 323 324 325
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
326
	int oldsk_state = sk->sk_state;
327 328 329
	int res = -EINVAL;

	switch (state) {
330 331 332
	case TIPC_OPEN:
		res = 0;
		break;
333
	case TIPC_LISTEN:
334
	case TIPC_CONNECTING:
335
		if (oldsk_state == TIPC_OPEN)
336 337
			res = 0;
		break;
338
	case TIPC_ESTABLISHED:
339
		if (oldsk_state == TIPC_CONNECTING ||
340
		    oldsk_state == TIPC_OPEN)
341 342
			res = 0;
		break;
343
	case TIPC_DISCONNECTING:
344
		if (oldsk_state == TIPC_CONNECTING ||
345 346 347
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
348 349 350 351 352 353 354 355
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
#define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
({                                                                             \
	struct sock *sk_;						       \
	int rc_;							       \
									       \
	while ((rc_ = !(condition_))) {					       \
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
		sk_ = (sock_)->sk;					       \
		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
		if (rc_)						       \
			break;						       \
		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
		release_sock(sk_);					       \
		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
		sched_annotate_sleep();				               \
		lock_sock(sk_);						       \
		remove_wait_queue(sk_sleep(sk_), &wait_);		       \
	}								       \
	rc_;								       \
397 398
})

P
Per Liden 已提交
399
/**
400
 * tipc_sk_create - create a TIPC socket
401
 * @net: network namespace (must be default network)
P
Per Liden 已提交
402 403
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
404
 * @kern: caused by kernel or by userspace?
405
 *
406 407
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
408 409 410
 *
 * Returns 0 on success, errno otherwise
 */
411 412
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
413
{
414
	const struct proto_ops *ops;
P
Per Liden 已提交
415
	struct sock *sk;
416
	struct tipc_sock *tsk;
417
	struct tipc_msg *msg;
418 419

	/* Validate arguments */
P
Per Liden 已提交
420 421 422 423 424
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
425
		ops = &stream_ops;
P
Per Liden 已提交
426 427
		break;
	case SOCK_SEQPACKET:
428
		ops = &packet_ops;
P
Per Liden 已提交
429 430 431
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
432
		ops = &msg_ops;
P
Per Liden 已提交
433
		break;
434 435
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
436 437
	}

438
	/* Allocate socket's protocol area */
439
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
440
	if (sk == NULL)
P
Per Liden 已提交
441 442
		return -ENOMEM;

443
	tsk = tipc_sk(sk);
444 445
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
446
	INIT_LIST_HEAD(&tsk->cong_links);
447
	msg = &tsk->phdr;
P
Per Liden 已提交
448

449 450 451
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
452
	tipc_set_sk_state(sk, TIPC_OPEN);
453
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
454
		pr_warn("Socket create failed; port number exhausted\n");
455 456
		return -EINVAL;
	}
457 458 459 460

	/* Ensure tsk is visible before we read own_addr. */
	smp_mb();

461 462
	tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE,
		      TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
463

464
	msg_set_origport(msg, tsk->portid);
465
	timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
466
	sk->sk_shutdown = 0;
J
Jon Maloy 已提交
467
	sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
468
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
469 470
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
471
	sk->sk_destruct = tipc_sock_destruct;
472
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
473
	tsk->group_is_open = true;
474
	atomic_set(&tsk->dupl_rcvcnt, 0);
475

476 477 478 479
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

480
	if (tipc_sk_type_connectionless(sk)) {
481
		tsk_set_unreturnable(tsk, true);
482
		if (sock->type == SOCK_DGRAM)
483
			tsk_set_unreliable(tsk, true);
484
	}
485

P
Per Liden 已提交
486 487 488
	return 0;
}

489 490 491 492 493 494 495
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

496 497 498 499 500 501
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
502
	long timeout = CONN_TIMEOUT_DEFAULT;
503 504 505
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

506 507 508 509
	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
					    !tsk_conn_cong(tsk)));

510 511 512 513 514 515
	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
516
			continue;
517
		}
518 519 520 521 522 523
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
524
	}
525 526 527 528

	if (tipc_sk_type_connectionless(sk))
		return;

529 530 531 532 533 534 535
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
536 537
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
538 539 540
	}
}

P
Per Liden 已提交
541
/**
542
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
543 544 545 546 547 548 549
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
550
 *
P
Per Liden 已提交
551 552 553 554 555 556
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
557
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
558 559
{
	struct sock *sk = sock->sk;
560
	struct tipc_sock *tsk;
P
Per Liden 已提交
561

562 563 564 565 566
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
567
		return 0;
568

569
	tsk = tipc_sk(sk);
570 571
	lock_sock(sk);

572 573
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
J
Jon Maloy 已提交
574
	tipc_sk_leave(tsk);
575
	tipc_sk_withdraw(tsk, 0, NULL);
576
	sk_stop_timer(sk, &sk->sk_timer);
577
	tipc_sk_remove(tsk);
P
Per Liden 已提交
578

C
Cong Wang 已提交
579
	sock_orphan(sk);
580 581
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
J
Jon Maloy 已提交
582
	tipc_dest_list_purge(&tsk->cong_links);
583
	tsk->cong_link_cnt = 0;
584
	call_rcu(&tsk->rcu, tipc_sk_callback);
585
	sock->sk = NULL;
P
Per Liden 已提交
586

587
	return 0;
P
Per Liden 已提交
588 589 590
}

/**
591
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
592 593 594
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
595
 *
P
Per Liden 已提交
596 597 598
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
599
 *
P
Per Liden 已提交
600
 * Returns 0 on success, errno otherwise
601 602 603
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
604
 */
605 606
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
607
{
608
	struct sock *sk = sock->sk;
P
Per Liden 已提交
609
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
610
	struct tipc_sock *tsk = tipc_sk(sk);
611
	int res = -EINVAL;
P
Per Liden 已提交
612

613 614
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
615
		res = tipc_sk_withdraw(tsk, 0, NULL);
616 617
		goto exit;
	}
J
Jon Maloy 已提交
618 619 620 621
	if (tsk->group) {
		res = -EACCES;
		goto exit;
	}
622 623 624 625 626 627 628 629
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
630 631 632

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
633 634 635 636
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
637

638
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
639
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
640 641 642 643
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
644

J
Jon Maloy 已提交
645
	res = (addr->scope >= 0) ?
646 647
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
648 649 650
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
651 652
}

653
/**
654
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
655 656 657
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
658
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
659
 *
P
Per Liden 已提交
660
 * Returns 0 on success, errno otherwise
661
 *
662 663
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
664
 *       a completely predictable manner).
P
Per Liden 已提交
665
 */
666
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
667
			int peer)
P
Per Liden 已提交
668 669
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
670 671
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
672

673
	memset(addr, 0, sizeof(*addr));
674
	if (peer) {
675
		if ((!tipc_sk_connected(sk)) &&
676
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
677
			return -ENOTCONN;
678 679
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
680
	} else {
681
		addr->addr.id.ref = tsk->portid;
682
		addr->addr.id.node = tipc_own_addr(sock_net(sk));
683
	}
P
Per Liden 已提交
684 685 686 687 688 689

	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

690
	return sizeof(*addr);
P
Per Liden 已提交
691 692 693
}

/**
694
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
695 696
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
697
 * @wait: ???
P
Per Liden 已提交
698
 *
699 700 701 702 703 704 705 706
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
707 708 709
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
710
 */
711 712
static __poll_t tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
713
{
714
	struct sock *sk = sock->sk;
715
	struct tipc_sock *tsk = tipc_sk(sk);
A
Al Viro 已提交
716
	__poll_t revents = 0;
717

C
Christoph Hellwig 已提交
718
	sock_poll_wait(file, wait);
719

720
	if (sk->sk_shutdown & RCV_SHUTDOWN)
721
		revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
722
	if (sk->sk_shutdown == SHUTDOWN_MASK)
723
		revents |= EPOLLHUP;
724

725 726
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
727
	case TIPC_CONNECTING:
728
		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
729
			revents |= EPOLLOUT;
730 731
		/* fall thru' */
	case TIPC_LISTEN:
J
Jon Maloy 已提交
732
		if (!skb_queue_empty(&sk->sk_receive_queue))
733
			revents |= EPOLLIN | EPOLLRDNORM;
734 735
		break;
	case TIPC_OPEN:
736
		if (tsk->group_is_open && !tsk->cong_link_cnt)
737
			revents |= EPOLLOUT;
738 739
		if (!tipc_sk_type_connectionless(sk))
			break;
J
Jon Maloy 已提交
740
		if (skb_queue_empty(&sk->sk_receive_queue))
741
			break;
742
		revents |= EPOLLIN | EPOLLRDNORM;
743 744
		break;
	case TIPC_DISCONNECTING:
745
		revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
746
		break;
747
	}
748
	return revents;
P
Per Liden 已提交
749 750
}

751 752 753 754
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
755
 * @msg: message to send
756 757
 * @dlen: length of data to send
 * @timeout: timeout to wait for wakeup
758 759 760 761 762
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
763
			  struct msghdr *msg, size_t dlen, long timeout)
764 765
{
	struct sock *sk = sock->sk;
766
	struct tipc_sock *tsk = tipc_sk(sk);
767
	struct tipc_msg *hdr = &tsk->phdr;
768
	struct net *net = sock_net(sk);
769
	int mtu = tipc_bcast_get_mtu(net);
770
	struct tipc_mc_method *method = &tsk->mc_method;
771
	struct sk_buff_head pkts;
772
	struct tipc_nlist dsts;
773 774
	int rc;

J
Jon Maloy 已提交
775 776 777
	if (tsk->group)
		return -EACCES;

778
	/* Block or return if any destination link is congested */
779 780 781
	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;
782

783 784 785
	/* Lookup destination nodes */
	tipc_nlist_init(&dsts, tipc_own_addr(net));
	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
786
				      seq->upper, &dsts);
787 788 789 790
	if (!dsts.local && !dsts.remote)
		return -EHOSTUNREACH;

	/* Build message header */
791
	msg_set_type(hdr, TIPC_MCAST_MSG);
792
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
793 794 795 796 797 798 799
	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nametype(hdr, seq->type);
	msg_set_namelower(hdr, seq->lower);
	msg_set_nameupper(hdr, seq->upper);

800
	/* Build message as chain of buffers */
801 802
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
803

804 805
	/* Send message if build was successful */
	if (unlikely(rc == dlen))
806
		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
807 808 809
				     &tsk->cong_link_cnt);

	tipc_nlist_purge(&dsts);
810 811

	return rc ? rc : dlen;
812 813
}

814 815 816 817 818 819 820 821 822 823 824 825 826
/**
 * tipc_send_group_msg - send a message to a member in the group
 * @net: network namespace
 * @m: message to send
 * @mb: group member
 * @dnode: destination node
 * @dport: destination port
 * @dlen: total length of message data
 */
static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
			       struct msghdr *m, struct tipc_member *mb,
			       u32 dnode, u32 dport, int dlen)
{
827
	u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
828
	struct tipc_mc_method *method = &tsk->mc_method;
829 830 831 832 833 834 835 836 837 838
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_msg *hdr = &tsk->phdr;
	struct sk_buff_head pkts;
	int mtu, rc;

	/* Complete message header */
	msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
	msg_set_destport(hdr, dport);
	msg_set_destnode(hdr, dnode);
839
	msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
840 841 842 843 844 845 846 847 848 849 850 851 852 853 854

	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
		tipc_dest_push(&tsk->cong_links, dnode, 0);
		tsk->cong_link_cnt++;
	}

855
	/* Update send window */
856 857
	tipc_group_update_member(mb, blks);

858 859 860
	/* A broadcast sent within next EXPIRE period must follow same path */
	method->rcast = true;
	method->mandatory = true;
861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
	return dlen;
}

/**
 * tipc_send_group_unicast - send message to a member in the group
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
				   int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct net *net = sock_net(sk);
	struct tipc_member *mb = NULL;
	u32 node, port;
	int rc;

	node = dest->addr.id.node;
	port = dest->addr.id.ref;
	if (!port && !node)
		return -EHOSTUNREACH;

	/* Block or return if destination link or member is congested */
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(&tsk->cong_links, node, 0) &&
				!tipc_group_cong(grp, node, port, blks, &mb));
	if (unlikely(rc))
		return rc;

	if (unlikely(!mb))
		return -EHOSTUNREACH;

	rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);

	return rc ? rc : dlen;
}

907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
/**
 * tipc_send_group_anycast - send message to any member with given identity
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
				   int dlen, long timeout)
{
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct list_head *cong_links = &tsk->cong_links;
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_group *grp = tsk->group;
926
	struct tipc_msg *hdr = &tsk->phdr;
927 928 929 930 931
	struct tipc_member *first = NULL;
	struct tipc_member *mbr = NULL;
	struct net *net = sock_net(sk);
	u32 node, port, exclude;
	struct list_head dsts;
932
	u32 type, inst, scope;
933 934 935 936 937 938
	int lookups = 0;
	int dstcnt, rc;
	bool cong;

	INIT_LIST_HEAD(&dsts);

939
	type = msg_nametype(hdr);
940
	inst = dest->addr.name.name.instance;
941
	scope = msg_lookup_scope(hdr);
942 943 944 945 946 947 948
	exclude = tipc_group_exclude(grp);

	while (++lookups < 4) {
		first = NULL;

		/* Look for a non-congested destination member, if any */
		while (1) {
949
			if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989
						 &dstcnt, exclude, false))
				return -EHOSTUNREACH;
			tipc_dest_pop(&dsts, &node, &port);
			cong = tipc_group_cong(grp, node, port, blks, &mbr);
			if (!cong)
				break;
			if (mbr == first)
				break;
			if (!first)
				first = mbr;
		}

		/* Start over if destination was not in member list */
		if (unlikely(!mbr))
			continue;

		if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
			break;

		/* Block or return if destination link or member is congested */
		rc = tipc_wait_for_cond(sock, &timeout,
					!tipc_dest_find(cong_links, node, 0) &&
					!tipc_group_cong(grp, node, port,
							 blks, &mbr));
		if (unlikely(rc))
			return rc;

		/* Send, unless destination disappeared while waiting */
		if (likely(mbr))
			break;
	}

	if (unlikely(lookups >= 4))
		return -EHOSTUNREACH;

	rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);

	return rc ? rc : dlen;
}

J
Jon Maloy 已提交
990 991 992 993 994 995 996 997 998 999 1000 1001 1002
/**
 * tipc_send_group_bcast - send message to all members in communication group
 * @sk: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
1003
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
J
Jon Maloy 已提交
1004 1005 1006 1007 1008 1009
	struct sock *sk = sock->sk;
	struct net *net = sock_net(sk);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_nlist *dsts = tipc_group_dests(grp);
	struct tipc_mc_method *method = &tsk->mc_method;
1010
	bool ack = method->mandatory && method->rcast;
1011
	int blks = tsk_blocks(MCAST_H_SIZE + dlen);
J
Jon Maloy 已提交
1012 1013 1014 1015 1016 1017 1018 1019
	struct tipc_msg *hdr = &tsk->phdr;
	int mtu = tipc_bcast_get_mtu(net);
	struct sk_buff_head pkts;
	int rc = -EHOSTUNREACH;

	if (!dsts->local && !dsts->remote)
		return -EHOSTUNREACH;

1020 1021 1022
	/* Block or return if any destination link or member is congested */
	rc = tipc_wait_for_cond(sock, &timeout,	!tsk->cong_link_cnt &&
				!tipc_group_bc_cong(grp, blks));
J
Jon Maloy 已提交
1023 1024 1025 1026
	if (unlikely(rc))
		return rc;

	/* Complete message header */
1027 1028 1029 1030 1031 1032 1033
	if (dest) {
		msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
		msg_set_nameinst(hdr, dest->addr.name.name.instance);
	} else {
		msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
		msg_set_nameinst(hdr, 0);
	}
1034
	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
J
Jon Maloy 已提交
1035 1036 1037 1038
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));

1039 1040 1041
	/* Avoid getting stuck with repeated forced replicasts */
	msg_set_grp_bc_ack_req(hdr, ack);

J
Jon Maloy 已提交
1042 1043 1044 1045 1046 1047 1048
	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
1049
	rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
J
Jon Maloy 已提交
1050 1051 1052
	if (unlikely(rc))
		return rc;

1053
	/* Update broadcast sequence number and send windows */
1054 1055 1056 1057 1058 1059
	tipc_group_update_bc_members(tsk->group, blks, ack);

	/* Broadcast link is now free to choose method for next broadcast */
	method->mandatory = false;
	method->expires = jiffies;

J
Jon Maloy 已提交
1060 1061 1062
	return dlen;
}

1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
/**
 * tipc_send_group_mcast - send message to all members with given identity
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
1080
	struct tipc_msg *hdr = &tsk->phdr;
1081
	struct net *net = sock_net(sk);
1082
	u32 type, inst, scope, exclude;
1083
	struct list_head dsts;
1084
	u32 dstcnt;
1085 1086 1087

	INIT_LIST_HEAD(&dsts);

1088 1089 1090
	type = msg_nametype(hdr);
	inst = dest->addr.name.name.instance;
	scope = msg_lookup_scope(hdr);
1091
	exclude = tipc_group_exclude(grp);
1092 1093 1094

	if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
				 &dstcnt, exclude, true))
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
		return -EHOSTUNREACH;

	if (dstcnt == 1) {
		tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
		return tipc_send_group_unicast(sock, m, dlen, timeout);
	}

	tipc_dest_list_purge(&dsts);
	return tipc_send_group_bcast(sock, m, dlen, timeout);
}

1106 1107 1108 1109 1110 1111
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
1112
 */
1113 1114
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
1115
{
J
Jon Maloy 已提交
1116
	u32 self = tipc_own_addr(net);
1117
	u32 type, lower, upper, scope;
1118
	struct sk_buff *skb, *_skb;
1119
	u32 portid, onode;
1120
	struct sk_buff_head tmpq;
J
Jon Maloy 已提交
1121
	struct list_head dports;
1122 1123 1124
	struct tipc_msg *hdr;
	int user, mtyp, hlen;
	bool exact;
1125

1126
	__skb_queue_head_init(&tmpq);
1127
	INIT_LIST_HEAD(&dports);
1128

1129 1130
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
1131 1132 1133 1134 1135 1136 1137
		hdr = buf_msg(skb);
		user = msg_user(hdr);
		mtyp = msg_type(hdr);
		hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
		onode = msg_orignode(hdr);
		type = msg_nametype(hdr);

1138 1139 1140 1141 1142 1143
		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
			spin_lock_bh(&inputq->lock);
			if (skb_peek(arrvq) == skb) {
				__skb_dequeue(arrvq);
				__skb_queue_tail(inputq, skb);
			}
J
Jon Maloy 已提交
1144
			kfree_skb(skb);
1145 1146 1147
			spin_unlock_bh(&inputq->lock);
			continue;
		}
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163

		/* Group messages require exact scope match */
		if (msg_in_group(hdr)) {
			lower = 0;
			upper = ~0;
			scope = msg_lookup_scope(hdr);
			exact = true;
		} else {
			/* TIPC_NODE_SCOPE means "any scope" in this context */
			if (onode == self)
				scope = TIPC_NODE_SCOPE;
			else
				scope = TIPC_CLUSTER_SCOPE;
			exact = false;
			lower = msg_namelower(hdr);
			upper = msg_nameupper(hdr);
J
Jon Maloy 已提交
1164
		}
1165 1166 1167 1168 1169 1170

		/* Create destination port list: */
		tipc_nametbl_mc_lookup(net, type, lower, upper,
				       scope, exact, &dports);

		/* Clone message per destination */
J
Jon Maloy 已提交
1171
		while (tipc_dest_pop(&dports, NULL, &portid)) {
1172
			_skb = __pskb_copy(skb, hlen, GFP_ATOMIC);
1173 1174 1175 1176 1177 1178
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
1179
		}
1180 1181 1182 1183 1184 1185 1186 1187 1188
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
1189
	}
1190
	tipc_sk_rcv(net, inputq);
1191 1192
}

1193
/**
J
Jon Maloy 已提交
1194
 * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
1195
 * @tsk: receiving socket
1196
 * @skb: pointer to message buffer.
1197
 */
J
Jon Maloy 已提交
1198 1199
static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
				   struct sk_buff_head *xmitq)
1200
{
1201
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1202 1203
	u32 onode = tsk_own_node(tsk);
	struct sock *sk = &tsk->sk;
1204
	int mtyp = msg_type(hdr);
1205
	bool conn_cong;
1206

1207
	/* Ignore if connection cannot be validated: */
1208
	if (!tsk_peer_msg(tsk, hdr))
1209 1210
		goto exit;

1211 1212 1213 1214 1215 1216 1217 1218
	if (unlikely(msg_errcode(hdr))) {
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
				      tsk_peer_port(tsk));
		sk->sk_state_change(sk);
		goto exit;
	}

1219
	tsk->probe_unacked = false;
1220

1221 1222
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
1223 1224
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
1225 1226
		return;
	} else if (mtyp == CONN_ACK) {
1227
		conn_cong = tsk_conn_cong(tsk);
1228 1229 1230
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
1231
		if (conn_cong)
1232 1233 1234
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
1235 1236
	}
exit:
1237
	kfree_skb(skb);
1238 1239
}

P
Per Liden 已提交
1240
/**
1241
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
1242 1243
 * @sock: socket structure
 * @m: message to send
1244
 * @dsz: amount of user data to be sent
1245
 *
P
Per Liden 已提交
1246
 * Message must have an destination specified explicitly.
1247
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
1248 1249
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
1250
 *
P
Per Liden 已提交
1251 1252
 * Returns the number of bytes sent on success, or errno otherwise
 */
1253
static int tipc_sendmsg(struct socket *sock,
1254
			struct msghdr *m, size_t dsz)
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

1266
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1267
{
1268
	struct sock *sk = sock->sk;
1269
	struct net *net = sock_net(sk);
1270 1271 1272 1273 1274
	struct tipc_sock *tsk = tipc_sk(sk);
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct list_head *clinks = &tsk->cong_links;
	bool syn = !tipc_sk_type_connectionless(sk);
J
Jon Maloy 已提交
1275
	struct tipc_group *grp = tsk->group;
1276
	struct tipc_msg *hdr = &tsk->phdr;
1277
	struct tipc_name_seq *seq;
1278
	struct sk_buff_head pkts;
1279
	u32 dport, dnode = 0;
J
Jon Maloy 已提交
1280
	u32 type, inst;
1281
	int mtu, rc;
P
Per Liden 已提交
1282

1283
	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
1284
		return -EMSGSIZE;
1285

1286 1287 1288 1289 1290 1291 1292 1293 1294 1295
	if (likely(dest)) {
		if (unlikely(m->msg_namelen < sizeof(*dest)))
			return -EINVAL;
		if (unlikely(dest->family != AF_TIPC))
			return -EINVAL;
	}

	if (grp) {
		if (!dest)
			return tipc_send_group_bcast(sock, m, dlen, timeout);
1296 1297
		if (dest->addrtype == TIPC_ADDR_NAME)
			return tipc_send_group_anycast(sock, m, dlen, timeout);
1298 1299
		if (dest->addrtype == TIPC_ADDR_ID)
			return tipc_send_group_unicast(sock, m, dlen, timeout);
1300 1301
		if (dest->addrtype == TIPC_ADDR_MCAST)
			return tipc_send_group_mcast(sock, m, dlen, timeout);
1302 1303
		return -EINVAL;
	}
J
Jon Maloy 已提交
1304

1305
	if (unlikely(!dest)) {
1306 1307
		dest = &tsk->peer;
		if (!syn || dest->family != AF_TIPC)
1308 1309
			return -EDESTADDRREQ;
	}
1310 1311

	if (unlikely(syn)) {
1312
		if (sk->sk_state == TIPC_LISTEN)
1313
			return -EPIPE;
1314
		if (sk->sk_state != TIPC_OPEN)
1315 1316 1317
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
1318
		if (dest->addrtype == TIPC_ADDR_NAME) {
1319 1320
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
1321
		}
P
Per Liden 已提交
1322
	}
1323

1324 1325 1326
	seq = &dest->addr.nameseq;
	if (dest->addrtype == TIPC_ADDR_MCAST)
		return tipc_sendmcast(sock, seq, m, dlen, timeout);
1327

1328 1329 1330
	if (dest->addrtype == TIPC_ADDR_NAME) {
		type = dest->addr.name.name.type;
		inst = dest->addr.name.name.instance;
J
Jon Maloy 已提交
1331
		dnode = dest->addr.name.domain;
1332 1333 1334 1335
		msg_set_type(hdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
		msg_set_nametype(hdr, type);
		msg_set_nameinst(hdr, inst);
J
Jon Maloy 已提交
1336
		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
1337
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
1338 1339
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dport);
1340 1341
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
1342 1343
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
1344 1345 1346 1347 1348
		msg_set_type(hdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(hdr, 0);
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dest->addr.id.ref);
		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
1349 1350
	} else {
		return -EINVAL;
1351 1352
	}

1353
	/* Block or return if destination link is congested */
J
Jon Maloy 已提交
1354 1355
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(clinks, dnode, 0));
1356 1357 1358 1359
	if (unlikely(rc))
		return rc;

	skb_queue_head_init(&pkts);
1360
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
1361 1362
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
1363
		return rc;
1364

1365 1366
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
J
Jon Maloy 已提交
1367
		tipc_dest_push(clinks, dnode, 0);
1368 1369 1370
		tsk->cong_link_cnt++;
		rc = 0;
	}
1371

1372 1373 1374 1375
	if (unlikely(syn && !rc))
		tipc_set_sk_state(sk, TIPC_CONNECTING);

	return rc ? rc : dlen;
P
Per Liden 已提交
1376 1377
}

1378
/**
1379
 * tipc_sendstream - send stream-oriented data
P
Per Liden 已提交
1380
 * @sock: socket structure
1381 1382
 * @m: data to send
 * @dsz: total length of data to be transmitted
1383
 *
1384
 * Used for SOCK_STREAM data.
1385
 *
1386 1387
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1388
 */
1389
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1390 1391 1392 1393 1394
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
1395
	ret = __tipc_sendstream(sock, m, dsz);
1396 1397 1398 1399 1400
	release_sock(sk);

	return ret;
}

1401
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1402
{
1403
	struct sock *sk = sock->sk;
1404
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1405 1406 1407 1408 1409 1410 1411 1412
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = &tsk->phdr;
	struct net *net = sock_net(sk);
	struct sk_buff_head pkts;
	u32 dnode = tsk_peer_node(tsk);
	int send, sent = 0;
	int rc = 0;
1413

1414
	skb_queue_head_init(&pkts);
1415

1416 1417
	if (unlikely(dlen > INT_MAX))
		return -EMSGSIZE;
1418

1419 1420 1421 1422 1423
	/* Handle implicit connection setup */
	if (unlikely(dest)) {
		rc = __tipc_sendmsg(sock, m, dlen);
		if (dlen && (dlen == rc))
			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1424
		return rc;
1425
	}
1426

1427
	do {
1428 1429
		rc = tipc_wait_for_cond(sock, &timeout,
					(!tsk->cong_link_cnt &&
1430 1431
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449
		if (unlikely(rc))
			break;

		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
		if (unlikely(rc != send))
			break;

		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
		if (unlikely(rc == -ELINKCONG)) {
			tsk->cong_link_cnt = 1;
			rc = 0;
		}
		if (likely(!rc)) {
			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
			sent += send;
		}
	} while (sent < dlen && !rc);
1450

1451
	return sent ? sent : rc;
P
Per Liden 已提交
1452 1453
}

1454
/**
1455
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1456
 * @sock: socket structure
1457 1458
 * @m: message to send
 * @dsz: length of data to be transmitted
1459
 *
1460
 * Used for SOCK_SEQPACKET messages.
1461
 *
1462
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1463
 */
1464
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1465
{
1466 1467
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1468

1469
	return tipc_sendstream(sock, m, dsz);
P
Per Liden 已提交
1470 1471
}

1472
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1473
 */
1474
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1475
				u32 peer_node)
P
Per Liden 已提交
1476
{
1477 1478
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1479
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1480

1481 1482 1483 1484 1485
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1486

1487
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
1488
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1489 1490
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1491
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1492 1493 1494 1495 1496 1497
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1498 1499 1500
}

/**
1501
 * tipc_sk_set_orig_addr - capture sender's address for received message
P
Per Liden 已提交
1502
 * @m: descriptor for message info
1503
 * @hdr: received message header
1504
 *
P
Per Liden 已提交
1505 1506
 * Note: Address is not captured if not requested by receiver.
 */
1507
static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
P
Per Liden 已提交
1508
{
1509 1510 1511 1512 1513 1514 1515 1516
	DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
	struct tipc_msg *hdr = buf_msg(skb);

	if (!srcaddr)
		return;

	srcaddr->sock.family = AF_TIPC;
	srcaddr->sock.addrtype = TIPC_ADDR_ID;
1517
	srcaddr->sock.scope = 0;
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
	srcaddr->sock.addr.id.ref = msg_origport(hdr);
	srcaddr->sock.addr.id.node = msg_orignode(hdr);
	srcaddr->sock.addr.name.domain = 0;
	m->msg_namelen = sizeof(struct sockaddr_tipc);

	if (!msg_in_group(hdr))
		return;

	/* Group message users may also want to know sending member's id */
	srcaddr->member.family = AF_TIPC;
	srcaddr->member.addrtype = TIPC_ADDR_NAME;
1529
	srcaddr->member.scope = 0;
1530 1531 1532 1533
	srcaddr->member.addr.name.name.type = msg_nametype(hdr);
	srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
	srcaddr->member.addr.name.domain = 0;
	m->msg_namelen = sizeof(*srcaddr);
P
Per Liden 已提交
1534 1535 1536
}

/**
1537
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1538 1539
 * @m: descriptor for message info
 * @msg: received message header
1540
 * @tsk: TIPC port associated with message
1541
 *
P
Per Liden 已提交
1542
 * Note: Ancillary data is not captured if not requested by receiver.
1543
 *
P
Per Liden 已提交
1544 1545
 * Returns 0 if successful, otherwise errno
 */
1546 1547
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1548 1549 1550 1551
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1552
	int has_name;
P
Per Liden 已提交
1553 1554 1555 1556 1557 1558 1559 1560 1561 1562
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1563 1564
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1565
			return res;
1566 1567 1568 1569 1570 1571
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1572 1573 1574 1575 1576 1577
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1578
		has_name = 1;
P
Per Liden 已提交
1579 1580 1581 1582 1583
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1584
		has_name = 1;
P
Per Liden 已提交
1585 1586 1587 1588 1589
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1590 1591 1592 1593
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1594 1595
		break;
	default:
1596
		has_name = 0;
P
Per Liden 已提交
1597
	}
1598 1599 1600 1601 1602
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1603 1604 1605 1606

	return 0;
}

1607
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1608
{
1609 1610
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1611
	struct sk_buff *skb = NULL;
1612
	struct tipc_msg *msg;
1613 1614
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1615

1616
	if (!tipc_sk_connected(sk))
1617
		return;
1618 1619 1620
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1621
	if (!skb)
1622
		return;
1623
	msg = buf_msg(skb);
1624 1625 1626 1627 1628 1629 1630 1631
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1632
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1633 1634
}

1635
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1636 1637 1638
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1639
	long timeo = *timeop;
1640 1641 1642 1643
	int err = sock_error(sk);

	if (err)
		return err;
Y
Ying Xue 已提交
1644 1645 1646

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1647
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1648
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1662 1663 1664
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
1665 1666 1667 1668

		err = sock_error(sk);
		if (err)
			break;
Y
Ying Xue 已提交
1669 1670
	}
	finish_wait(sk_sleep(sk), &wait);
1671
	*timeop = timeo;
Y
Ying Xue 已提交
1672 1673 1674
	return err;
}

1675
/**
1676
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1677
 * @m: descriptor for message info
1678
 * @buflen: length of user buffer area
P
Per Liden 已提交
1679
 * @flags: receive flags
1680
 *
P
Per Liden 已提交
1681 1682 1683 1684 1685
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1686 1687
static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
			size_t buflen,	int flags)
P
Per Liden 已提交
1688
{
1689
	struct sock *sk = sock->sk;
1690
	bool connected = !tipc_sk_type_connectionless(sk);
1691
	struct tipc_sock *tsk = tipc_sk(sk);
1692
	int rc, err, hlen, dlen, copy;
1693
	struct sk_buff_head xmitq;
1694 1695 1696
	struct tipc_msg *hdr;
	struct sk_buff *skb;
	bool grp_evt;
1697
	long timeout;
P
Per Liden 已提交
1698

1699
	/* Catch invalid receive requests */
1700
	if (unlikely(!buflen))
P
Per Liden 已提交
1701 1702
		return -EINVAL;

1703
	lock_sock(sk);
1704 1705
	if (unlikely(connected && sk->sk_state == TIPC_OPEN)) {
		rc = -ENOTCONN;
P
Per Liden 已提交
1706 1707
		goto exit;
	}
1708
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1709

1710
	/* Step rcv queue to first msg with data or error; wait if necessary */
1711 1712 1713 1714 1715 1716 1717 1718 1719
	do {
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			goto exit;
		skb = skb_peek(&sk->sk_receive_queue);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1720
		grp_evt = msg_is_grp_evt(hdr);
1721 1722
		if (likely(dlen || err))
			break;
1723
		tsk_advance_rx_queue(sk);
1724
	} while (1);
P
Per Liden 已提交
1725

1726
	/* Collect msg meta data, including error code and rejected data */
1727
	tipc_sk_set_orig_addr(m, skb);
1728 1729
	rc = tipc_sk_anc_data_recv(m, hdr, tsk);
	if (unlikely(rc))
P
Per Liden 已提交
1730 1731
		goto exit;

1732 1733 1734 1735
	/* Capture data if non-error msg, otherwise just set return value */
	if (likely(!err)) {
		copy = min_t(int, dlen, buflen);
		if (unlikely(copy != dlen))
P
Per Liden 已提交
1736
			m->msg_flags |= MSG_TRUNC;
1737
		rc = skb_copy_datagram_msg(skb, hlen, m, copy);
P
Per Liden 已提交
1738
	} else {
1739 1740 1741 1742
		copy = 0;
		rc = 0;
		if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
			rc = -ECONNRESET;
P
Per Liden 已提交
1743
	}
1744 1745
	if (unlikely(rc))
		goto exit;
P
Per Liden 已提交
1746

1747 1748 1749 1750 1751 1752 1753 1754
	/* Mark message as group event if applicable */
	if (unlikely(grp_evt)) {
		if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
			m->msg_flags |= MSG_EOR;
		m->msg_flags |= MSG_OOB;
		copy = 0;
	}

1755
	/* Caption of data or error code/rejected data was successful */
1756 1757 1758
	if (unlikely(flags & MSG_PEEK))
		goto exit;

1759 1760 1761 1762 1763 1764 1765 1766 1767
	/* Send group flow control advertisement when applicable */
	if (tsk->group && msg_in_group(hdr) && !grp_evt) {
		skb_queue_head_init(&xmitq);
		tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
					  msg_orignode(hdr), msg_origport(hdr),
					  &xmitq);
		tipc_node_distr_xmit(sock_net(sk), &xmitq);
	}

1768
	tsk_advance_rx_queue(sk);
1769

1770 1771 1772
	if (likely(!connected))
		goto exit;

1773
	/* Send connection flow control advertisement when applicable */
1774 1775 1776
	tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
	if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
		tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1777
exit:
1778
	release_sock(sk);
1779
	return rc ? rc : copy;
P
Per Liden 已提交
1780 1781
}

1782
/**
1783
 * tipc_recvstream - receive stream-oriented data
P
Per Liden 已提交
1784
 * @m: descriptor for message info
1785
 * @buflen: total size of user buffer area
P
Per Liden 已提交
1786
 * @flags: receive flags
1787 1788
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1789 1790 1791 1792
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1793 1794
static int tipc_recvstream(struct socket *sock, struct msghdr *m,
			   size_t buflen, int flags)
P
Per Liden 已提交
1795
{
1796
	struct sock *sk = sock->sk;
1797
	struct tipc_sock *tsk = tipc_sk(sk);
1798 1799 1800 1801 1802 1803 1804
	struct sk_buff *skb;
	struct tipc_msg *hdr;
	struct tipc_skb_cb *skb_cb;
	bool peek = flags & MSG_PEEK;
	int offset, required, copy, copied = 0;
	int hlen, dlen, err, rc;
	long timeout;
P
Per Liden 已提交
1805

1806
	/* Catch invalid receive attempts */
1807
	if (unlikely(!buflen))
P
Per Liden 已提交
1808 1809
		return -EINVAL;

1810
	lock_sock(sk);
P
Per Liden 已提交
1811

1812
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1813
		rc = -ENOTCONN;
Y
Ying Xue 已提交
1814
		goto exit;
P
Per Liden 已提交
1815
	}
1816 1817
	required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen);
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1818

1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829
	do {
		/* Look at first msg in receive queue; wait if necessary */
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			break;
		skb = skb_peek(&sk->sk_receive_queue);
		skb_cb = TIPC_SKB_CB(skb);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1830

1831 1832 1833 1834 1835
		/* Discard any empty non-errored (SYN-) message */
		if (unlikely(!dlen && !err)) {
			tsk_advance_rx_queue(sk);
			continue;
		}
1836

1837 1838
		/* Collect msg meta data, incl. error code and rejected data */
		if (!copied) {
1839
			tipc_sk_set_orig_addr(m, skb);
1840 1841 1842 1843
			rc = tipc_sk_anc_data_recv(m, hdr, tsk);
			if (rc)
				break;
		}
P
Per Liden 已提交
1844

1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864
		/* Copy data if msg ok, otherwise return error/partial data */
		if (likely(!err)) {
			offset = skb_cb->bytes_read;
			copy = min_t(int, dlen - offset, buflen - copied);
			rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
			if (unlikely(rc))
				break;
			copied += copy;
			offset += copy;
			if (unlikely(offset < dlen)) {
				if (!peek)
					skb_cb->bytes_read = offset;
				break;
			}
		} else {
			rc = 0;
			if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control)
				rc = -ECONNRESET;
			if (copied || rc)
				break;
P
Per Liden 已提交
1865 1866
		}

1867 1868
		if (unlikely(peek))
			break;
P
Per Liden 已提交
1869

1870
		tsk_advance_rx_queue(sk);
1871

1872 1873 1874 1875
		/* Send connection flow control advertisement when applicable */
		tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
		if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
			tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1876

1877 1878 1879
		/* Exit if all requested data or FIN/error received */
		if (copied == buflen || err)
			break;
P
Per Liden 已提交
1880

1881
	} while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required);
P
Per Liden 已提交
1882
exit:
1883
	release_sock(sk);
1884
	return copied ? copied : rc;
P
Per Liden 已提交
1885 1886
}

1887 1888 1889 1890 1891 1892 1893 1894 1895 1896
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1897
	if (skwq_has_sleeper(wq))
1898 1899
		wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
						EPOLLWRNORM | EPOLLWRBAND);
1900 1901 1902 1903 1904 1905 1906 1907
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1908
static void tipc_data_ready(struct sock *sk)
1909 1910 1911 1912 1913
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1914
	if (skwq_has_sleeper(wq))
1915 1916
		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
						EPOLLRDNORM | EPOLLRDBAND);
1917 1918 1919
	rcu_read_unlock();
}

1920 1921 1922 1923 1924
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

J
Jon Maloy 已提交
1925 1926 1927 1928 1929 1930 1931
static void tipc_sk_proto_rcv(struct sock *sk,
			      struct sk_buff_head *inputq,
			      struct sk_buff_head *xmitq)
{
	struct sk_buff *skb = __skb_dequeue(inputq);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1932
	struct tipc_group *grp = tsk->group;
1933
	bool wakeup = false;
J
Jon Maloy 已提交
1934 1935 1936 1937 1938 1939

	switch (msg_user(hdr)) {
	case CONN_MANAGER:
		tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
		return;
	case SOCK_WAKEUP:
J
Jon Maloy 已提交
1940
		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
J
Jon Maloy 已提交
1941
		tsk->cong_link_cnt--;
1942
		wakeup = true;
J
Jon Maloy 已提交
1943
		break;
J
Jon Maloy 已提交
1944
	case GROUP_PROTOCOL:
1945
		tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
J
Jon Maloy 已提交
1946
		break;
J
Jon Maloy 已提交
1947
	case TOP_SRV:
1948
		tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
1949
				      hdr, inputq, xmitq);
J
Jon Maloy 已提交
1950 1951 1952 1953 1954
		break;
	default:
		break;
	}

1955 1956 1957
	if (wakeup)
		sk->sk_write_space(sk);

J
Jon Maloy 已提交
1958 1959 1960
	kfree_skb(skb);
}

1961
/**
J
Jon Maloy 已提交
1962
 * tipc_filter_connect - Handle incoming message for a connection-based socket
1963
 * @tsk: TIPC socket
1964
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1965
 *
1966
 * Returns true if everything ok, false otherwise
1967
 */
J
Jon Maloy 已提交
1968
static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1969
{
1970
	struct sock *sk = &tsk->sk;
1971
	struct net *net = sock_net(sk);
1972
	struct tipc_msg *hdr = buf_msg(skb);
1973 1974
	u32 pport = msg_origport(hdr);
	u32 pnode = msg_orignode(hdr);
1975

1976 1977
	if (unlikely(msg_mcast(hdr)))
		return false;
1978

1979 1980
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1981
		/* Accept only ACK or NACK message */
1982 1983 1984 1985 1986 1987 1988 1989 1990 1991
		if (unlikely(!msg_connected(hdr))) {
			if (pport != tsk_peer_port(tsk) ||
			    pnode != tsk_peer_node(tsk))
				return false;

			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			sk->sk_err = ECONNREFUSED;
			sk->sk_state_change(sk);
			return true;
		}
1992

1993
		if (unlikely(msg_errcode(hdr))) {
1994
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1995
			sk->sk_err = ECONNREFUSED;
1996
			sk->sk_state_change(sk);
1997
			return true;
1998 1999
		}

2000
		if (unlikely(!msg_isdata(hdr))) {
2001
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2002
			sk->sk_err = EINVAL;
2003
			sk->sk_state_change(sk);
2004
			return true;
2005 2006
		}

2007 2008
		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
		msg_set_importance(&tsk->phdr, msg_importance(hdr));
2009

2010 2011 2012 2013 2014
		/* If 'ACK+' message, add to socket receive queue */
		if (msg_data_sz(hdr))
			return true;

		/* If empty 'ACK-' message, wake up sleeping connect() */
2015
		sk->sk_data_ready(sk);
2016 2017 2018 2019 2020

		/* 'ACK-' message is neither accepted nor rejected: */
		msg_set_dest_droppable(hdr, 1);
		return false;

2021
	case TIPC_OPEN:
2022
	case TIPC_DISCONNECTING:
2023 2024
		break;
	case TIPC_LISTEN:
2025
		/* Accept only SYN message */
2026 2027
		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
			return true;
2028
		break;
2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
		if (unlikely(!tsk_peer_msg(tsk, hdr)))
			return false;

		if (unlikely(msg_errcode(hdr))) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			/* Let timer expire on it's own */
			tipc_node_remove_conn(net, tsk_peer_node(tsk),
					      tsk->portid);
			sk->sk_state_change(sk);
		}
		return true;
2042
	default:
2043
		pr_err("Unknown sk_state %u\n", sk->sk_state);
2044
	}
2045

2046
	return false;
2047 2048
}

2049 2050 2051
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
2052
 * @skb: message
2053
 *
2054 2055
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
2056
 *
2057 2058
 * For connectionless messages, queue limits are based on message
 * importance as follows:
2059
 *
2060 2061 2062 2063
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
2064 2065 2066
 *
 * Returns overload limit according to corresponding message importance
 */
2067
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
2068
{
2069 2070 2071
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

2072 2073 2074
	if (unlikely(msg_in_group(hdr)))
		return sk->sk_rcvbuf;

2075 2076
	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
2077

2078 2079
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
2080

2081
	return FLOWCTL_MSG_LIM;
2082 2083
}

2084
/**
J
Jon Maloy 已提交
2085
 * tipc_sk_filter_rcv - validate incoming message
2086
 * @sk: socket
2087
 * @skb: pointer to message.
2088
 *
2089 2090 2091
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
2092
 * Called with socket lock already taken
2093
 *
P
Per Liden 已提交
2094
 */
J
Jon Maloy 已提交
2095 2096
static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
			       struct sk_buff_head *xmitq)
P
Per Liden 已提交
2097
{
J
Jon Maloy 已提交
2098
	bool sk_conn = !tipc_sk_type_connectionless(sk);
2099
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2100
	struct tipc_group *grp = tsk->group;
2101
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
2102 2103 2104
	struct net *net = sock_net(sk);
	struct sk_buff_head inputq;
	int limit, err = TIPC_OK;
2105

J
Jon Maloy 已提交
2106 2107 2108
	TIPC_SKB_CB(skb)->bytes_read = 0;
	__skb_queue_head_init(&inputq);
	__skb_queue_tail(&inputq, skb);
2109

J
Jon Maloy 已提交
2110 2111
	if (unlikely(!msg_isdata(hdr)))
		tipc_sk_proto_rcv(sk, &inputq, xmitq);
2112

J
Jon Maloy 已提交
2113 2114 2115
	if (unlikely(grp))
		tipc_group_filter_msg(grp, &inputq, xmitq);

J
Jon Maloy 已提交
2116 2117 2118 2119 2120
	/* Validate and add to receive buffer if there is space */
	while ((skb = __skb_dequeue(&inputq))) {
		hdr = buf_msg(skb);
		limit = rcvbuf_limit(sk, skb);
		if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
J
Jon Maloy 已提交
2121 2122
		    (!sk_conn && msg_connected(hdr)) ||
		    (!grp && msg_in_group(hdr)))
2123
			err = TIPC_ERR_NO_PORT;
2124 2125
		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
			atomic_inc(&sk->sk_drops);
J
Jon Maloy 已提交
2126
			err = TIPC_ERR_OVERLOAD;
2127
		}
P
Per Liden 已提交
2128

J
Jon Maloy 已提交
2129 2130 2131 2132 2133 2134 2135 2136
		if (unlikely(err)) {
			tipc_skb_reject(net, err, skb, xmitq);
			err = TIPC_OK;
			continue;
		}
		__skb_queue_tail(&sk->sk_receive_queue, skb);
		skb_set_owner_r(skb, sk);
		sk->sk_data_ready(sk);
2137
	}
2138
}
P
Per Liden 已提交
2139

2140
/**
J
Jon Maloy 已提交
2141
 * tipc_sk_backlog_rcv - handle incoming message from backlog queue
2142
 * @sk: socket
2143
 * @skb: message
2144
 *
2145
 * Caller must hold socket lock
2146
 */
J
Jon Maloy 已提交
2147
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
2148
{
J
Jon Maloy 已提交
2149
	unsigned int before = sk_rmem_alloc_get(sk);
J
Jon Paul Maloy 已提交
2150
	struct sk_buff_head xmitq;
J
Jon Maloy 已提交
2151
	unsigned int added;
2152

J
Jon Paul Maloy 已提交
2153 2154
	__skb_queue_head_init(&xmitq);

J
Jon Maloy 已提交
2155 2156 2157
	tipc_sk_filter_rcv(sk, skb, &xmitq);
	added = sk_rmem_alloc_get(sk) - before;
	atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
2158

J
Jon Maloy 已提交
2159
	/* Send pending response/rejected messages, if any */
2160
	tipc_node_distr_xmit(sock_net(sk), &xmitq);
2161 2162 2163
	return 0;
}

2164
/**
2165 2166 2167 2168 2169
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
2170 2171 2172
 *
 * Caller must hold socket lock
 */
2173
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
2174
			    u32 dport, struct sk_buff_head *xmitq)
2175
{
J
Jon Paul Maloy 已提交
2176 2177
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
2178 2179
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
2180
	u32 onode;
2181 2182

	while (skb_queue_len(inputq)) {
2183
		if (unlikely(time_after_eq(jiffies, time_limit)))
2184 2185
			return;

2186 2187
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
2188 2189 2190
			return;

		/* Add message directly to receive queue if possible */
2191
		if (!sock_owned_by_user(sk)) {
J
Jon Maloy 已提交
2192
			tipc_sk_filter_rcv(sk, skb, xmitq);
2193
			continue;
2194
		}
2195 2196

		/* Try backlog, compensating for double-counted bytes */
2197
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
2198
		if (!sk->sk_backlog.len)
2199 2200 2201 2202
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
2203 2204

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
2205
		onode = tipc_own_addr(sock_net(sk));
2206
		atomic_inc(&sk->sk_drops);
J
Jon Paul Maloy 已提交
2207 2208
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
2209
		break;
2210
	}
2211 2212
}

2213
/**
2214 2215 2216 2217
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
2218
 */
2219
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
2220
{
J
Jon Paul Maloy 已提交
2221
	struct sk_buff_head xmitq;
2222
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
2223
	int err;
2224 2225
	struct tipc_sock *tsk;
	struct sock *sk;
2226
	struct sk_buff *skb;
2227

J
Jon Paul Maloy 已提交
2228
	__skb_queue_head_init(&xmitq);
2229 2230 2231
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
2232

2233 2234 2235
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
2236
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
2237 2238
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
2239
			/* Send pending response/rejected messages, if any */
2240
			tipc_node_distr_xmit(sock_net(sk), &xmitq);
2241 2242 2243
			sock_put(sk);
			continue;
		}
2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255
		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
2256
			continue;
2257
xmit:
2258
		dnode = msg_destnode(buf_msg(skb));
2259
		tipc_node_xmit_skb(net, skb, dnode, dport);
2260
	}
P
Per Liden 已提交
2261 2262
}

Y
Ying Xue 已提交
2263 2264
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
2265
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
2278
		add_wait_queue(sk_sleep(sk), &wait);
2279
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
2280 2281
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
2282 2283 2284 2285
	} while (!done);
	return 0;
}

P
Per Liden 已提交
2286
/**
2287
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
2288 2289 2290
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
2291
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
2292 2293 2294
 *
 * Returns 0 on success, errno otherwise
 */
2295 2296
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
2297
{
2298
	struct sock *sk = sock->sk;
2299
	struct tipc_sock *tsk = tipc_sk(sk);
2300 2301
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
2302
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
2303
	int previous;
2304
	int res = 0;
2305

2306 2307 2308
	if (destlen != sizeof(struct sockaddr_tipc))
		return -EINVAL;

2309 2310
	lock_sock(sk);

J
Jon Maloy 已提交
2311 2312 2313 2314 2315
	if (tsk->group) {
		res = -EINVAL;
		goto exit;
	}

2316 2317 2318
	if (dst->family == AF_UNSPEC) {
		memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
		if (!tipc_sk_type_connectionless(sk))
2319
			res = -EINVAL;
2320
		goto exit;
2321 2322
	} else if (dst->family != AF_TIPC) {
		res = -EINVAL;
2323
	}
2324
	if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
2325
		res = -EINVAL;
2326 2327 2328 2329 2330 2331
	if (res)
		goto exit;

	/* DGRAM/RDM connect(), just save the destaddr */
	if (tipc_sk_type_connectionless(sk)) {
		memcpy(&tsk->peer, dest, destlen);
2332 2333 2334
		goto exit;
	}

2335
	previous = sk->sk_state;
2336 2337 2338

	switch (sk->sk_state) {
	case TIPC_OPEN:
2339 2340 2341 2342 2343 2344 2345 2346 2347 2348
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

2349
		res = __tipc_sendmsg(sock, &m, 0);
2350 2351 2352
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

2353
		/* Just entered TIPC_CONNECTING state; the only
2354 2355 2356 2357
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
2358 2359 2360 2361 2362
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
2363
			goto exit;
2364
		}
Y
Ying Xue 已提交
2365 2366 2367
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
2368 2369
		break;
	case TIPC_ESTABLISHED:
2370
		res = -EISCONN;
2371 2372
		break;
	default:
2373
		res = -EINVAL;
2374
	}
2375

2376 2377
exit:
	release_sock(sk);
2378
	return res;
P
Per Liden 已提交
2379 2380
}

2381
/**
2382
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
2383 2384
 * @sock: socket structure
 * @len: (unused)
2385
 *
P
Per Liden 已提交
2386 2387
 * Returns 0 on success, errno otherwise
 */
2388
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
2389
{
2390 2391 2392 2393
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
2394
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
2395
	release_sock(sk);
2396

2397
	return res;
P
Per Liden 已提交
2398 2399
}

Y
Ying Xue 已提交
2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
2414
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
2415 2416 2417 2418 2419 2420 2421 2422 2423 2424
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
2425 2426 2427
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
2428 2429 2430 2431 2432
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2433
/**
2434
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2435 2436 2437
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2438
 *
P
Per Liden 已提交
2439 2440
 * Returns 0 on success, errno otherwise
 */
2441 2442
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern)
P
Per Liden 已提交
2443
{
2444
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2445
	struct sk_buff *buf;
2446
	struct tipc_sock *new_tsock;
2447
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2448
	long timeo;
2449
	int res;
P
Per Liden 已提交
2450

2451
	lock_sock(sk);
P
Per Liden 已提交
2452

2453
	if (sk->sk_state != TIPC_LISTEN) {
2454
		res = -EINVAL;
P
Per Liden 已提交
2455 2456
		goto exit;
	}
Y
Ying Xue 已提交
2457 2458 2459 2460
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2461 2462 2463

	buf = skb_peek(&sk->sk_receive_queue);

2464
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
2465 2466
	if (res)
		goto exit;
2467
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2468

2469
	new_sk = new_sock->sk;
2470
	new_tsock = tipc_sk(new_sk);
2471
	msg = buf_msg(buf);
P
Per Liden 已提交
2472

2473 2474 2475 2476 2477 2478 2479
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2480
	tsk_rej_rx_queue(new_sk);
2481 2482

	/* Connect new socket to it's peer */
2483
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2484

2485
	tsk_set_importance(new_tsock, msg_importance(msg));
2486
	if (msg_named(msg)) {
2487 2488
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2489
	}
2490 2491 2492 2493 2494 2495 2496 2497

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2498
		tsk_advance_rx_queue(sk);
2499
		__tipc_sendstream(new_sock, &m, 0);
2500 2501 2502
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2503
		skb_set_owner_r(buf, new_sk);
2504 2505
	}
	release_sock(new_sk);
P
Per Liden 已提交
2506
exit:
2507
	release_sock(sk);
P
Per Liden 已提交
2508 2509 2510 2511
	return res;
}

/**
2512
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2513
 * @sock: socket structure
2514
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2515 2516
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2517
 *
P
Per Liden 已提交
2518 2519
 * Returns 0 on success, errno otherwise
 */
2520
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2521
{
2522
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2523 2524
	int res;

2525 2526
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2527

2528
	lock_sock(sk);
P
Per Liden 已提交
2529

2530 2531
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2532

2533
	if (sk->sk_state == TIPC_DISCONNECTING) {
2534
		/* Discard any unreceived messages */
2535
		__skb_queue_purge(&sk->sk_receive_queue);
2536 2537 2538

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2539
		res = 0;
2540
	} else {
P
Per Liden 已提交
2541 2542 2543
		res = -ENOTCONN;
	}

2544
	release_sock(sk);
P
Per Liden 已提交
2545 2546 2547
	return res;
}

2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573
static void tipc_sk_check_probing_state(struct sock *sk,
					struct sk_buff_head *list)
{
	struct tipc_sock *tsk = tipc_sk(sk);
	u32 pnode = tsk_peer_node(tsk);
	u32 pport = tsk_peer_port(tsk);
	u32 self = tsk_own_node(tsk);
	u32 oport = tsk->portid;
	struct sk_buff *skb;

	if (tsk->probe_unacked) {
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		sk->sk_err = ECONNABORTED;
		tipc_node_remove_conn(sock_net(sk), pnode, pport);
		sk->sk_state_change(sk);
		return;
	}
	/* Prepare new probe */
	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
			      pnode, self, pport, oport, TIPC_OK);
	if (skb)
		__skb_queue_tail(list, skb);
	tsk->probe_unacked = true;
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
}

2574
static void tipc_sk_timeout(struct timer_list *t)
2575
{
2576 2577
	struct sock *sk = from_timer(sk, t, sk_timer);
	struct tipc_sock *tsk = tipc_sk(sk);
2578 2579
	u32 pnode = tsk_peer_node(tsk);
	struct sk_buff_head list;
2580

2581
	skb_queue_head_init(&list);
J
Jon Paul Maloy 已提交
2582
	bh_lock_sock(sk);
2583 2584 2585 2586

	/* Try again later if socket is busy */
	if (sock_owned_by_user(sk)) {
		sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
2587 2588
		bh_unlock_sock(sk);
		return;
2589 2590
	}

2591 2592 2593
	if (sk->sk_state == TIPC_ESTABLISHED)
		tipc_sk_check_probing_state(sk, &list);

2594
	bh_unlock_sock(sk);
2595 2596 2597 2598

	if (!skb_queue_empty(&list))
		tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid);

2599
	sock_put(sk);
2600 2601
}

2602
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2603 2604
			   struct tipc_name_seq const *seq)
{
2605 2606
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2607 2608 2609
	struct publication *publ;
	u32 key;

J
Jon Maloy 已提交
2610 2611 2612
	if (scope != TIPC_NODE_SCOPE)
		scope = TIPC_CLUSTER_SCOPE;

2613
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2614
		return -EINVAL;
2615 2616
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2617 2618
		return -EADDRINUSE;

2619
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2620
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2621 2622 2623
	if (unlikely(!publ))
		return -EINVAL;

J
Jon Maloy 已提交
2624
	list_add(&publ->binding_sock, &tsk->publications);
2625 2626
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2627 2628 2629
	return 0;
}

2630
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2631 2632
			    struct tipc_name_seq const *seq)
{
2633
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2634 2635 2636 2637
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

J
Jon Maloy 已提交
2638 2639 2640
	if (scope != TIPC_NODE_SCOPE)
		scope = TIPC_CLUSTER_SCOPE;

J
Jon Maloy 已提交
2641
	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
J
Jon Paul Maloy 已提交
2642 2643 2644 2645 2646 2647 2648 2649 2650
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2651
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
2652
					      publ->upper, publ->key);
J
Jon Paul Maloy 已提交
2653 2654 2655
			rc = 0;
			break;
		}
2656
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
2657
				      publ->upper, publ->key);
J
Jon Paul Maloy 已提交
2658 2659
		rc = 0;
	}
2660 2661
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2662 2663 2664
	return rc;
}

2665 2666 2667
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2668
void tipc_sk_reinit(struct net *net)
2669
{
2670
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2671
	struct rhashtable_iter iter;
2672
	struct tipc_sock *tsk;
2673 2674
	struct tipc_msg *msg;

2675 2676 2677
	rhashtable_walk_enter(&tn->sk_rht, &iter);

	do {
2678
		rhashtable_walk_start(&iter);
2679 2680

		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
2681 2682
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2683 2684
			msg_set_prevnode(msg, tipc_own_addr(net));
			msg_set_orignode(msg, tipc_own_addr(net));
2685 2686
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2687

2688 2689
		rhashtable_walk_stop(&iter);
	} while (tsk == ERR_PTR(-EAGAIN));
2690 2691

	rhashtable_walk_exit(&iter);
2692 2693
}

2694
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2695
{
2696
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2697
	struct tipc_sock *tsk;
2698

2699
	rcu_read_lock();
2700
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2701 2702 2703
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2704

2705
	return tsk;
2706 2707
}

2708
static int tipc_sk_insert(struct tipc_sock *tsk)
2709
{
2710 2711 2712
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2713 2714
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2715

2716 2717 2718 2719 2720 2721
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2722 2723
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2724 2725
			return 0;
		sock_put(&tsk->sk);
2726 2727
	}

2728
	return -1;
2729 2730
}

2731
static void tipc_sk_remove(struct tipc_sock *tsk)
2732
{
2733
	struct sock *sk = &tsk->sk;
2734
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2735

2736
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2737
		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
2738
		__sock_put(sk);
2739 2740 2741
	}
}

2742 2743 2744 2745 2746 2747 2748
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2749
	.automatic_shrinking = true,
2750 2751
};

2752
int tipc_sk_rht_init(struct net *net)
2753
{
2754
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2755 2756

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2757 2758
}

2759
void tipc_sk_rht_destroy(struct net *net)
2760
{
2761 2762
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2763 2764
	/* Wait for socket readers to complete */
	synchronize_net();
2765

2766
	rhashtable_destroy(&tn->sk_rht);
2767 2768
}

J
Jon Maloy 已提交
2769 2770 2771 2772 2773 2774 2775 2776 2777 2778
static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
{
	struct net *net = sock_net(&tsk->sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_msg *hdr = &tsk->phdr;
	struct tipc_name_seq seq;
	int rc;

	if (mreq->type < TIPC_RESERVED_TYPES)
		return -EACCES;
2779 2780
	if (mreq->scope > TIPC_NODE_SCOPE)
		return -EINVAL;
J
Jon Maloy 已提交
2781 2782
	if (grp)
		return -EACCES;
2783
	grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
J
Jon Maloy 已提交
2784 2785 2786 2787 2788 2789 2790 2791 2792
	if (!grp)
		return -ENOMEM;
	tsk->group = grp;
	msg_set_lookup_scope(hdr, mreq->scope);
	msg_set_nametype(hdr, mreq->type);
	msg_set_dest_droppable(hdr, true);
	seq.type = mreq->type;
	seq.lower = mreq->instance;
	seq.upper = seq.lower;
2793
	tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
J
Jon Maloy 已提交
2794
	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
C
Cong Wang 已提交
2795
	if (rc) {
J
Jon Maloy 已提交
2796
		tipc_group_delete(net, grp);
C
Cong Wang 已提交
2797
		tsk->group = NULL;
2798
		return rc;
C
Cong Wang 已提交
2799
	}
2800
	/* Eliminate any risk that a broadcast overtakes sent JOINs */
2801 2802
	tsk->mc_method.rcast = true;
	tsk->mc_method.mandatory = true;
2803
	tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf);
J
Jon Maloy 已提交
2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822
	return rc;
}

static int tipc_sk_leave(struct tipc_sock *tsk)
{
	struct net *net = sock_net(&tsk->sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_name_seq seq;
	int scope;

	if (!grp)
		return -EINVAL;
	tipc_group_self(grp, &seq, &scope);
	tipc_group_delete(net, grp);
	tsk->group = NULL;
	tipc_sk_withdraw(tsk, scope, &seq);
	return 0;
}

P
Per Liden 已提交
2823
/**
2824
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2825 2826 2827 2828 2829
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2830 2831
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2832
 * (to ease compatibility).
2833
 *
P
Per Liden 已提交
2834 2835
 * Returns 0 on success, errno otherwise
 */
2836 2837
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2838
{
2839
	struct sock *sk = sock->sk;
2840
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2841
	struct tipc_group_req mreq;
2842
	u32 value = 0;
2843
	int res = 0;
P
Per Liden 已提交
2844

2845 2846
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2847 2848
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2849 2850 2851 2852 2853 2854 2855 2856

	switch (opt) {
	case TIPC_IMPORTANCE:
	case TIPC_SRC_DROPPABLE:
	case TIPC_DEST_DROPPABLE:
	case TIPC_CONN_TIMEOUT:
		if (ol < sizeof(value))
			return -EINVAL;
J
Jon Maloy 已提交
2857 2858 2859 2860 2861 2862 2863 2864
		if (get_user(value, (u32 __user *)ov))
			return -EFAULT;
		break;
	case TIPC_GROUP_JOIN:
		if (ol < sizeof(mreq))
			return -EINVAL;
		if (copy_from_user(&mreq, ov, sizeof(mreq)))
			return -EFAULT;
2865 2866 2867 2868 2869
		break;
	default:
		if (ov || ol)
			return -EINVAL;
	}
P
Per Liden 已提交
2870

2871
	lock_sock(sk);
2872

P
Per Liden 已提交
2873 2874
	switch (opt) {
	case TIPC_IMPORTANCE:
2875
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2876 2877 2878
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2879
			tsk_set_unreliable(tsk, value);
2880
		else
P
Per Liden 已提交
2881 2882 2883
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2884
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2885 2886
		break;
	case TIPC_CONN_TIMEOUT:
2887
		tipc_sk(sk)->conn_timeout = value;
P
Per Liden 已提交
2888
		break;
2889 2890 2891 2892 2893 2894 2895 2896
	case TIPC_MCAST_BROADCAST:
		tsk->mc_method.rcast = false;
		tsk->mc_method.mandatory = true;
		break;
	case TIPC_MCAST_REPLICAST:
		tsk->mc_method.rcast = true;
		tsk->mc_method.mandatory = true;
		break;
J
Jon Maloy 已提交
2897 2898 2899 2900 2901 2902
	case TIPC_GROUP_JOIN:
		res = tipc_sk_join(tsk, &mreq);
		break;
	case TIPC_GROUP_LEAVE:
		res = tipc_sk_leave(tsk);
		break;
P
Per Liden 已提交
2903 2904 2905 2906
	default:
		res = -EINVAL;
	}

2907 2908
	release_sock(sk);

P
Per Liden 已提交
2909 2910 2911 2912
	return res;
}

/**
2913
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2914 2915 2916 2917 2918
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2919 2920
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2921
 * (to ease compatibility).
2922
 *
P
Per Liden 已提交
2923 2924
 * Returns 0 on success, errno otherwise
 */
2925 2926
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2927
{
2928
	struct sock *sk = sock->sk;
2929
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2930 2931
	struct tipc_name_seq seq;
	int len, scope;
P
Per Liden 已提交
2932
	u32 value;
2933
	int res;
P
Per Liden 已提交
2934

2935 2936
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2937 2938
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2939 2940
	res = get_user(len, ol);
	if (res)
2941
		return res;
P
Per Liden 已提交
2942

2943
	lock_sock(sk);
P
Per Liden 已提交
2944 2945 2946

	switch (opt) {
	case TIPC_IMPORTANCE:
2947
		value = tsk_importance(tsk);
P
Per Liden 已提交
2948 2949
		break;
	case TIPC_SRC_DROPPABLE:
2950
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2951 2952
		break;
	case TIPC_DEST_DROPPABLE:
2953
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2954 2955
		break;
	case TIPC_CONN_TIMEOUT:
2956
		value = tsk->conn_timeout;
2957
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2958
		break;
2959
	case TIPC_NODE_RECVQ_DEPTH:
2960
		value = 0; /* was tipc_queue_size, now obsolete */
2961
		break;
2962
	case TIPC_SOCK_RECVQ_DEPTH:
2963 2964
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
J
Jon Maloy 已提交
2965 2966 2967 2968 2969 2970
	case TIPC_GROUP_JOIN:
		seq.type = 0;
		if (tsk->group)
			tipc_group_self(tsk->group, &seq, &scope);
		value = seq.type;
		break;
P
Per Liden 已提交
2971 2972 2973 2974
	default:
		res = -EINVAL;
	}

2975 2976
	release_sock(sk);

2977 2978
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2979

2980 2981 2982 2983 2984 2985 2986
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2987 2988
}

2989
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2990
{
2991 2992
	struct net *net = sock_net(sock->sk);
	struct tipc_sioc_nodeid_req nr = {0};
E
Erik Hugne 已提交
2993 2994 2995 2996 2997 2998 2999
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
3000
		if (!tipc_node_get_linkname(net,
3001
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
3002 3003 3004 3005 3006 3007
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
3008 3009 3010 3011 3012 3013 3014 3015
	case SIOCGETNODEID:
		if (copy_from_user(&nr, argp, sizeof(nr)))
			return -EFAULT;
		if (!tipc_node_get_id(net, nr.peer, nr.node_id))
			return -EADDRNOTAVAIL;
		if (copy_to_user(argp, &nr, sizeof(nr)))
			return -EFAULT;
		return 0;
E
Erik Hugne 已提交
3016 3017 3018 3019 3020
	default:
		return -ENOIOCTLCMD;
	}
}

3021 3022 3023 3024
static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
{
	struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
	struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
E
Erik Hugne 已提交
3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039
	u32 onode = tipc_own_addr(sock_net(sock1->sk));

	tsk1->peer.family = AF_TIPC;
	tsk1->peer.addrtype = TIPC_ADDR_ID;
	tsk1->peer.scope = TIPC_NODE_SCOPE;
	tsk1->peer.addr.id.ref = tsk2->portid;
	tsk1->peer.addr.id.node = onode;
	tsk2->peer.family = AF_TIPC;
	tsk2->peer.addrtype = TIPC_ADDR_ID;
	tsk2->peer.scope = TIPC_NODE_SCOPE;
	tsk2->peer.addr.id.ref = tsk1->portid;
	tsk2->peer.addr.id.node = onode;

	tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
	tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
3040 3041 3042
	return 0;
}

3043 3044
/* Protocol switches for the various types of TIPC sockets */

3045
static const struct proto_ops msg_ops = {
3046
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3047
	.family		= AF_TIPC,
3048 3049 3050
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
E
Erik Hugne 已提交
3051
	.socketpair	= tipc_socketpair,
3052
	.accept		= sock_no_accept,
3053
	.getname	= tipc_getname,
3054
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3055
	.ioctl		= tipc_ioctl,
3056
	.listen		= sock_no_listen,
3057 3058 3059 3060 3061
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
3062 3063
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3064 3065
};

3066
static const struct proto_ops packet_ops = {
3067
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3068
	.family		= AF_TIPC,
3069 3070 3071
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
3072
	.socketpair	= tipc_socketpair,
3073 3074
	.accept		= tipc_accept,
	.getname	= tipc_getname,
3075
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3076
	.ioctl		= tipc_ioctl,
3077 3078 3079 3080 3081 3082
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
3083 3084
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3085 3086
};

3087
static const struct proto_ops stream_ops = {
3088
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3089
	.family		= AF_TIPC,
3090 3091 3092
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
3093
	.socketpair	= tipc_socketpair,
3094 3095
	.accept		= tipc_accept,
	.getname	= tipc_getname,
3096
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3097
	.ioctl		= tipc_ioctl,
3098 3099 3100 3101
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
3102
	.sendmsg	= tipc_sendstream,
3103
	.recvmsg	= tipc_recvstream,
3104 3105
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3106 3107
};

3108
static const struct net_proto_family tipc_family_ops = {
3109
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3110
	.family		= AF_TIPC,
3111
	.create		= tipc_sk_create
P
Per Liden 已提交
3112 3113 3114 3115 3116
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
3117 3118
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
3119 3120 3121
};

/**
3122
 * tipc_socket_init - initialize TIPC socket interface
3123
 *
P
Per Liden 已提交
3124 3125
 * Returns 0 on success, errno otherwise
 */
3126
int tipc_socket_init(void)
P
Per Liden 已提交
3127 3128 3129
{
	int res;

3130
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
3131
	if (res) {
3132
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
3133 3134 3135 3136 3137
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
3138
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
3139 3140 3141 3142 3143 3144 3145 3146
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
3147
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
3148
 */
3149
void tipc_socket_stop(void)
P
Per Liden 已提交
3150 3151 3152 3153
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
3154 3155

/* Caller should hold socket lock for the passed tipc socket. */
3156
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

3190 3191 3192 3193 3194 3195 3196
static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
			  *tsk)
{
	struct net *net = sock_net(skb->sk);
	struct sock *sk = &tsk->sk;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
3197
	    nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net)))
3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209
		return -EMSGSIZE;

	if (tipc_sk_connected(sk)) {
		if (__tipc_nl_add_sk_con(skb, tsk))
			return -EMSGSIZE;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			return -EMSGSIZE;
	}
	return 0;
}

3210
/* Caller should hold socket lock for the passed tipc socket. */
3211 3212
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
3213 3214
{
	struct nlattr *attrs;
3215
	void *hdr;
3216 3217

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3218
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
3219 3220 3221 3222 3223 3224
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
3225 3226

	if (__tipc_nl_add_sk_info(skb, tsk))
3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

3242 3243 3244 3245
int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
		    int (*skb_handler)(struct sk_buff *skb,
				       struct netlink_callback *cb,
				       struct tipc_sock *tsk))
3246
{
3247
	struct rhashtable_iter *iter = (void *)cb->args[4];
3248 3249
	struct tipc_sock *tsk;
	int err;
3250

C
Cong Wang 已提交
3251 3252 3253 3254 3255 3256
	rhashtable_walk_start(iter);
	while ((tsk = rhashtable_walk_next(iter)) != NULL) {
		if (IS_ERR(tsk)) {
			err = PTR_ERR(tsk);
			if (err == -EAGAIN) {
				err = 0;
3257 3258
				continue;
			}
C
Cong Wang 已提交
3259 3260
			break;
		}
3261

C
Cong Wang 已提交
3262 3263 3264 3265 3266 3267 3268 3269
		sock_hold(&tsk->sk);
		rhashtable_walk_stop(iter);
		lock_sock(&tsk->sk);
		err = skb_handler(skb, cb, tsk);
		if (err) {
			release_sock(&tsk->sk);
			sock_put(&tsk->sk);
			goto out;
3270
		}
C
Cong Wang 已提交
3271 3272 3273
		release_sock(&tsk->sk);
		rhashtable_walk_start(iter);
		sock_put(&tsk->sk);
3274
	}
C
Cong Wang 已提交
3275
	rhashtable_walk_stop(iter);
3276
out:
3277 3278
	return skb->len;
}
3279 3280
EXPORT_SYMBOL(tipc_nl_sk_walk);

C
Cong Wang 已提交
3281 3282
int tipc_dump_start(struct netlink_callback *cb)
{
3283 3284 3285 3286 3287 3288 3289 3290
	return __tipc_dump_start(cb, sock_net(cb->skb->sk));
}
EXPORT_SYMBOL(tipc_dump_start);

int __tipc_dump_start(struct netlink_callback *cb, struct net *net)
{
	/* tipc_nl_name_table_dump() uses cb->args[0...3]. */
	struct rhashtable_iter *iter = (void *)cb->args[4];
C
Cong Wang 已提交
3291 3292 3293 3294 3295 3296 3297
	struct tipc_net *tn = tipc_net(net);

	if (!iter) {
		iter = kmalloc(sizeof(*iter), GFP_KERNEL);
		if (!iter)
			return -ENOMEM;

3298
		cb->args[4] = (long)iter;
C
Cong Wang 已提交
3299 3300 3301 3302 3303 3304 3305 3306
	}

	rhashtable_walk_enter(&tn->sk_rht, iter);
	return 0;
}

int tipc_dump_done(struct netlink_callback *cb)
{
3307
	struct rhashtable_iter *hti = (void *)cb->args[4];
C
Cong Wang 已提交
3308 3309 3310 3311 3312 3313 3314

	rhashtable_walk_exit(hti);
	kfree(hti);
	return 0;
}
EXPORT_SYMBOL(tipc_dump_done);

3315 3316
int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
			   struct tipc_sock *tsk, u32 sk_filter_state,
3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337
			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
{
	struct sock *sk = &tsk->sk;
	struct nlattr *attrs;
	struct nlattr *stat;

	/*filter response w.r.t sk_state*/
	if (!(sk_filter_state & (1 << sk->sk_state)))
		return 0;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto msg_cancel;

	if (__tipc_nl_add_sk_info(skb, tsk))
		goto attr_msg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_UID,
3338
			from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk),
3339
					 sock_i_uid(sk))) ||
3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351
	    nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
			      tipc_diag_gen_cookie(sk),
			      TIPC_NLA_SOCK_PAD))
		goto attr_msg_cancel;

	stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
	if (!stat)
		goto attr_msg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
			skb_queue_len(&sk->sk_receive_queue)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
3352 3353 3354
			skb_queue_len(&sk->sk_write_queue)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
			atomic_read(&sk->sk_drops)))
3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365
		goto stat_msg_cancel;

	if (tsk->cong_link_cnt &&
	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
		goto stat_msg_cancel;

	if (tsk_conn_cong(tsk) &&
	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
		goto stat_msg_cancel;

	nla_nest_end(skb, stat);
3366 3367 3368 3369 3370

	if (tsk->group)
		if (tipc_group_fill_sock_diag(tsk->group, skb))
			goto stat_msg_cancel;

3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382
	nla_nest_end(skb, attrs);

	return 0;

stat_msg_cancel:
	nla_nest_cancel(skb, stat);
attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
msg_cancel:
	return -EMSGSIZE;
}
EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
3383

3384 3385
int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
3386
	return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
3387 3388
}

3389
/* Caller should hold socket lock for the passed tipc socket. */
3390 3391 3392
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
3393 3394 3395 3396 3397
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3398
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
3429 3430 3431
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
3432 3433 3434 3435 3436
{
	int err;
	struct publication *p;

	if (*last_publ) {
J
Jon Maloy 已提交
3437
		list_for_each_entry(p, &tsk->publications, binding_sock) {
3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
J
Jon Maloy 已提交
3454
				     binding_sock);
3455 3456
	}

J
Jon Maloy 已提交
3457
	list_for_each_entry_from(p, &tsk->publications, binding_sock) {
3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
3472
	u32 tsk_portid = cb->args[0];
3473 3474
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
3475
	struct net *net = sock_net(skb->sk);
3476 3477
	struct tipc_sock *tsk;

3478
	if (!tsk_portid) {
3479 3480 3481 3482 3483 3484 3485
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

3486 3487 3488
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

3489 3490
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
3491
				       tipc_nl_sock_policy, NULL);
3492 3493 3494 3495 3496 3497
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

3498
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
3499 3500 3501 3502 3503
	}

	if (done)
		return 0;

3504
	tsk = tipc_sk_lookup(net, tsk_portid);
3505 3506 3507 3508 3509 3510 3511 3512
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
3513
	sock_put(&tsk->sk);
3514

3515
	cb->args[0] = tsk_portid;
3516 3517 3518 3519 3520
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}