socket.c 90.6 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
J
Jon Maloy 已提交
4
 * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
38 39
#include <linux/sched/signal.h>

P
Per Liden 已提交
40
#include "core.h"
41
#include "name_table.h"
E
Erik Hugne 已提交
42
#include "node.h"
43
#include "link.h"
44
#include "name_distr.h"
45
#include "socket.h"
46
#include "bcast.h"
47
#include "netlink.h"
J
Jon Maloy 已提交
48
#include "group.h"
49

50
#define CONN_TIMEOUT_DEFAULT    8000    /* default connect timeout = 8s */
51
#define CONN_PROBING_INTV	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
52 53 54
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
55
#define TIPC_ACK_RATE		4       /* ACK at 1/4 of of rcv window size */
56

57 58
enum {
	TIPC_LISTEN = TCP_LISTEN,
59
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
60
	TIPC_OPEN = TCP_CLOSE,
61
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
62
	TIPC_CONNECTING = TCP_SYN_SENT,
63 64
};

65 66 67 68 69
struct sockaddr_pair {
	struct sockaddr_tipc sock;
	struct sockaddr_tipc member;
};

70 71 72 73 74 75 76
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
77
 * @portid: unique port identity in TIPC socket hash table
78
 * @phdr: preformatted message header used when sending messages
79
 * #cong_links: list of congested links
80
 * @publications: list of publications for port
81
 * @blocking_link: address of the congested link we are currently sleeping on
82 83 84
 * @pub_count: total # of publications port has made during its lifetime
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
85
 * @cong_link_cnt: number of congested links
J
Jon Maloy 已提交
86
 * @snt_unacked: # messages sent by socket, and not yet acked by peer
87
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
88
 * @peer: 'connected' peer for dgram/rdm
89
 * @node: hash table node
90
 * @mc_method: cookie for use between socket and broadcast layer
91
 * @rcu: rcu struct for tipc_sock
92 93 94 95 96 97 98
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
99
	u32 portid;
100
	struct tipc_msg phdr;
101
	struct list_head cong_links;
102 103 104
	struct list_head publications;
	u32 pub_count;
	atomic_t dupl_rcvcnt;
105
	u16 conn_timeout;
106
	bool probe_unacked;
107
	u16 cong_link_cnt;
108 109
	u16 snt_unacked;
	u16 snd_win;
110
	u16 peer_caps;
111 112
	u16 rcv_unacked;
	u16 rcv_win;
113
	struct sockaddr_tipc peer;
114
	struct rhash_head node;
115
	struct tipc_mc_method mc_method;
116
	struct rcu_head rcu;
J
Jon Maloy 已提交
117
	struct tipc_group *group;
118
	bool group_is_open;
119
};
P
Per Liden 已提交
120

J
Jon Maloy 已提交
121
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
122
static void tipc_data_ready(struct sock *sk);
123
static void tipc_write_space(struct sock *sk);
124
static void tipc_sock_destruct(struct sock *sk);
125
static int tipc_release(struct socket *sock);
126 127
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern);
128
static void tipc_sk_timeout(struct timer_list *t);
129
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
130
			   struct tipc_name_seq const *seq);
131
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
132
			    struct tipc_name_seq const *seq);
J
Jon Maloy 已提交
133
static int tipc_sk_leave(struct tipc_sock *tsk);
134
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
135 136
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
137
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
138
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
139

140 141 142
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
143
static struct proto tipc_proto;
144 145
static const struct rhashtable_params tsk_rht_params;

146 147 148 149 150
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

151
static u32 tsk_peer_node(struct tipc_sock *tsk)
152
{
153
	return msg_destnode(&tsk->phdr);
154 155
}

156
static u32 tsk_peer_port(struct tipc_sock *tsk)
157
{
158
	return msg_destport(&tsk->phdr);
159 160
}

161
static  bool tsk_unreliable(struct tipc_sock *tsk)
162
{
163
	return msg_src_droppable(&tsk->phdr) != 0;
164 165
}

166
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
167
{
168
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
169 170
}

171
static bool tsk_unreturnable(struct tipc_sock *tsk)
172
{
173
	return msg_dest_droppable(&tsk->phdr) != 0;
174 175
}

176
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
177
{
178
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
179 180
}

181
static int tsk_importance(struct tipc_sock *tsk)
182
{
183
	return msg_importance(&tsk->phdr);
184 185
}

186
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
187 188 189
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
190
	msg_set_importance(&tsk->phdr, (u32)imp);
191 192
	return 0;
}
193

194 195 196 197 198
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

199
static bool tsk_conn_cong(struct tipc_sock *tsk)
200
{
201
	return tsk->snt_unacked > tsk->snd_win;
202 203
}

204 205 206 207 208
static u16 tsk_blocks(int len)
{
	return ((len / FLOWCTL_BLK_SZ) + 1);
}

209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
227 228
}

229
/**
230
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
231 232
 *
 * Caller must hold socket lock
P
Per Liden 已提交
233
 */
234
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
235
{
236
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
237 238
}

239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
255
/**
256
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
257 258
 *
 * Caller must hold socket lock
P
Per Liden 已提交
259
 */
260
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
261
{
262
	struct sk_buff *skb;
263

264 265
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
266 267
}

268 269
static bool tipc_sk_connected(struct sock *sk)
{
270
	return sk->sk_state == TIPC_ESTABLISHED;
271 272
}

273 274 275 276 277 278 279 280 281 282
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

283
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
284 285 286 287
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
288
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
289
{
290
	struct sock *sk = &tsk->sk;
291
	u32 self = tipc_own_addr(sock_net(sk));
292
	u32 peer_port = tsk_peer_port(tsk);
293
	u32 orig_node, peer_node;
J
Jon Paul Maloy 已提交
294

295
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
296 297 298 299 300 301
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
302
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
303 304 305 306

	if (likely(orig_node == peer_node))
		return true;

307
	if (!orig_node && peer_node == self)
J
Jon Paul Maloy 已提交
308 309
		return true;

310
	if (!peer_node && orig_node == self)
J
Jon Paul Maloy 已提交
311 312 313 314 315
		return true;

	return false;
}

316 317 318 319 320 321 322 323 324
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
325
	int oldsk_state = sk->sk_state;
326 327 328
	int res = -EINVAL;

	switch (state) {
329 330 331
	case TIPC_OPEN:
		res = 0;
		break;
332
	case TIPC_LISTEN:
333
	case TIPC_CONNECTING:
334
		if (oldsk_state == TIPC_OPEN)
335 336
			res = 0;
		break;
337
	case TIPC_ESTABLISHED:
338
		if (oldsk_state == TIPC_CONNECTING ||
339
		    oldsk_state == TIPC_OPEN)
340 341
			res = 0;
		break;
342
	case TIPC_DISCONNECTING:
343
		if (oldsk_state == TIPC_CONNECTING ||
344 345 346
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
347 348 349 350 351 352 353 354
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
#define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
({                                                                             \
	struct sock *sk_;						       \
	int rc_;							       \
									       \
	while ((rc_ = !(condition_))) {					       \
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
		sk_ = (sock_)->sk;					       \
		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
		if (rc_)						       \
			break;						       \
		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
		release_sock(sk_);					       \
		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
		sched_annotate_sleep();				               \
		lock_sock(sk_);						       \
		remove_wait_queue(sk_sleep(sk_), &wait_);		       \
	}								       \
	rc_;								       \
396 397
})

P
Per Liden 已提交
398
/**
399
 * tipc_sk_create - create a TIPC socket
400
 * @net: network namespace (must be default network)
P
Per Liden 已提交
401 402
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
403
 * @kern: caused by kernel or by userspace?
404
 *
405 406
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
407 408 409
 *
 * Returns 0 on success, errno otherwise
 */
410 411
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
412
{
413
	const struct proto_ops *ops;
P
Per Liden 已提交
414
	struct sock *sk;
415
	struct tipc_sock *tsk;
416
	struct tipc_msg *msg;
417 418

	/* Validate arguments */
P
Per Liden 已提交
419 420 421 422 423
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
424
		ops = &stream_ops;
P
Per Liden 已提交
425 426
		break;
	case SOCK_SEQPACKET:
427
		ops = &packet_ops;
P
Per Liden 已提交
428 429 430
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
431
		ops = &msg_ops;
P
Per Liden 已提交
432
		break;
433 434
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
435 436
	}

437
	/* Allocate socket's protocol area */
438
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
439
	if (sk == NULL)
P
Per Liden 已提交
440 441
		return -ENOMEM;

442
	tsk = tipc_sk(sk);
443 444
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
445
	INIT_LIST_HEAD(&tsk->cong_links);
446
	msg = &tsk->phdr;
P
Per Liden 已提交
447

448 449 450
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
451
	tipc_set_sk_state(sk, TIPC_OPEN);
452
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
453
		pr_warn("Socket create failed; port number exhausted\n");
454 455
		return -EINVAL;
	}
456 457 458 459

	/* Ensure tsk is visible before we read own_addr. */
	smp_mb();

460 461
	tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE,
		      TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
462

463
	msg_set_origport(msg, tsk->portid);
464
	timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
465
	sk->sk_shutdown = 0;
J
Jon Maloy 已提交
466
	sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
467
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
468 469
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
470
	sk->sk_destruct = tipc_sock_destruct;
471
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
472
	tsk->group_is_open = true;
473
	atomic_set(&tsk->dupl_rcvcnt, 0);
474

475 476 477 478
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

479
	if (tipc_sk_type_connectionless(sk)) {
480
		tsk_set_unreturnable(tsk, true);
481
		if (sock->type == SOCK_DGRAM)
482
			tsk_set_unreliable(tsk, true);
483
	}
484

P
Per Liden 已提交
485 486 487
	return 0;
}

488 489 490 491 492 493 494
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

495 496 497 498 499 500
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
501
	long timeout = CONN_TIMEOUT_DEFAULT;
502 503 504
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

505 506 507 508
	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
					    !tsk_conn_cong(tsk)));

509 510 511
	/* Remove any pending SYN message */
	__skb_queue_purge(&sk->sk_write_queue);

512 513 514 515 516 517
	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
518
			continue;
519
		}
520 521 522 523 524 525
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
526
	}
527 528 529 530

	if (tipc_sk_type_connectionless(sk))
		return;

531 532 533 534 535 536 537
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
538 539
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
540 541 542
	}
}

P
Per Liden 已提交
543
/**
544
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
545 546 547 548 549 550 551
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
552
 *
P
Per Liden 已提交
553 554 555 556 557 558
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
559
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
560 561
{
	struct sock *sk = sock->sk;
562
	struct tipc_sock *tsk;
P
Per Liden 已提交
563

564 565 566 567 568
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
569
		return 0;
570

571
	tsk = tipc_sk(sk);
572 573
	lock_sock(sk);

574 575
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
J
Jon Maloy 已提交
576
	tipc_sk_leave(tsk);
577
	tipc_sk_withdraw(tsk, 0, NULL);
578
	sk_stop_timer(sk, &sk->sk_timer);
579
	tipc_sk_remove(tsk);
P
Per Liden 已提交
580

C
Cong Wang 已提交
581
	sock_orphan(sk);
582 583
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
J
Jon Maloy 已提交
584
	tipc_dest_list_purge(&tsk->cong_links);
585
	tsk->cong_link_cnt = 0;
586
	call_rcu(&tsk->rcu, tipc_sk_callback);
587
	sock->sk = NULL;
P
Per Liden 已提交
588

589
	return 0;
P
Per Liden 已提交
590 591 592
}

/**
593
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
594 595 596
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
597
 *
P
Per Liden 已提交
598 599 600
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
601
 *
P
Per Liden 已提交
602
 * Returns 0 on success, errno otherwise
603 604 605
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
606
 */
607 608
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
609
{
610
	struct sock *sk = sock->sk;
P
Per Liden 已提交
611
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
612
	struct tipc_sock *tsk = tipc_sk(sk);
613
	int res = -EINVAL;
P
Per Liden 已提交
614

615 616
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
617
		res = tipc_sk_withdraw(tsk, 0, NULL);
618 619
		goto exit;
	}
J
Jon Maloy 已提交
620 621 622 623
	if (tsk->group) {
		res = -EACCES;
		goto exit;
	}
624 625 626 627 628 629 630 631
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
632 633 634

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
635 636 637 638
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
639

640
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
641
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
642 643 644 645
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
646

J
Jon Maloy 已提交
647
	res = (addr->scope >= 0) ?
648 649
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
650 651 652
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
653 654
}

655
/**
656
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
657 658 659
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
660
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
661
 *
P
Per Liden 已提交
662
 * Returns 0 on success, errno otherwise
663
 *
664 665
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
666
 *       a completely predictable manner).
P
Per Liden 已提交
667
 */
668
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
669
			int peer)
P
Per Liden 已提交
670 671
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
672 673
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
674

675
	memset(addr, 0, sizeof(*addr));
676
	if (peer) {
677
		if ((!tipc_sk_connected(sk)) &&
678
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
679
			return -ENOTCONN;
680 681
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
682
	} else {
683
		addr->addr.id.ref = tsk->portid;
684
		addr->addr.id.node = tipc_own_addr(sock_net(sk));
685
	}
P
Per Liden 已提交
686 687 688 689 690 691

	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

692
	return sizeof(*addr);
P
Per Liden 已提交
693 694 695
}

/**
696
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
697 698
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
699
 * @wait: ???
P
Per Liden 已提交
700
 *
701 702 703 704 705 706 707 708
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
709 710 711
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
712
 */
713 714
static __poll_t tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
715
{
716
	struct sock *sk = sock->sk;
717
	struct tipc_sock *tsk = tipc_sk(sk);
A
Al Viro 已提交
718
	__poll_t revents = 0;
719

C
Christoph Hellwig 已提交
720
	sock_poll_wait(file, wait);
721

722
	if (sk->sk_shutdown & RCV_SHUTDOWN)
723
		revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
724
	if (sk->sk_shutdown == SHUTDOWN_MASK)
725
		revents |= EPOLLHUP;
726

727 728
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
729
	case TIPC_CONNECTING:
730
		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
731
			revents |= EPOLLOUT;
732 733
		/* fall thru' */
	case TIPC_LISTEN:
J
Jon Maloy 已提交
734
		if (!skb_queue_empty(&sk->sk_receive_queue))
735
			revents |= EPOLLIN | EPOLLRDNORM;
736 737
		break;
	case TIPC_OPEN:
738
		if (tsk->group_is_open && !tsk->cong_link_cnt)
739
			revents |= EPOLLOUT;
740 741
		if (!tipc_sk_type_connectionless(sk))
			break;
J
Jon Maloy 已提交
742
		if (skb_queue_empty(&sk->sk_receive_queue))
743
			break;
744
		revents |= EPOLLIN | EPOLLRDNORM;
745 746
		break;
	case TIPC_DISCONNECTING:
747
		revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
748
		break;
749
	}
750
	return revents;
P
Per Liden 已提交
751 752
}

753 754 755 756
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
757
 * @msg: message to send
758 759
 * @dlen: length of data to send
 * @timeout: timeout to wait for wakeup
760 761 762 763 764
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
765
			  struct msghdr *msg, size_t dlen, long timeout)
766 767
{
	struct sock *sk = sock->sk;
768
	struct tipc_sock *tsk = tipc_sk(sk);
769
	struct tipc_msg *hdr = &tsk->phdr;
770
	struct net *net = sock_net(sk);
771
	int mtu = tipc_bcast_get_mtu(net);
772
	struct tipc_mc_method *method = &tsk->mc_method;
773
	struct sk_buff_head pkts;
774
	struct tipc_nlist dsts;
775 776
	int rc;

J
Jon Maloy 已提交
777 778 779
	if (tsk->group)
		return -EACCES;

780
	/* Block or return if any destination link is congested */
781 782 783
	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;
784

785 786 787
	/* Lookup destination nodes */
	tipc_nlist_init(&dsts, tipc_own_addr(net));
	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
788
				      seq->upper, &dsts);
789 790 791 792
	if (!dsts.local && !dsts.remote)
		return -EHOSTUNREACH;

	/* Build message header */
793
	msg_set_type(hdr, TIPC_MCAST_MSG);
794
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
795 796 797 798 799 800 801
	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nametype(hdr, seq->type);
	msg_set_namelower(hdr, seq->lower);
	msg_set_nameupper(hdr, seq->upper);

802
	/* Build message as chain of buffers */
803 804
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
805

806 807
	/* Send message if build was successful */
	if (unlikely(rc == dlen))
808
		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
809 810 811
				     &tsk->cong_link_cnt);

	tipc_nlist_purge(&dsts);
812 813

	return rc ? rc : dlen;
814 815
}

816 817 818 819 820 821 822 823 824 825 826 827 828
/**
 * tipc_send_group_msg - send a message to a member in the group
 * @net: network namespace
 * @m: message to send
 * @mb: group member
 * @dnode: destination node
 * @dport: destination port
 * @dlen: total length of message data
 */
static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
			       struct msghdr *m, struct tipc_member *mb,
			       u32 dnode, u32 dport, int dlen)
{
829
	u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
830
	struct tipc_mc_method *method = &tsk->mc_method;
831 832 833 834 835 836 837 838 839 840
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_msg *hdr = &tsk->phdr;
	struct sk_buff_head pkts;
	int mtu, rc;

	/* Complete message header */
	msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
	msg_set_destport(hdr, dport);
	msg_set_destnode(hdr, dnode);
841
	msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856

	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
		tipc_dest_push(&tsk->cong_links, dnode, 0);
		tsk->cong_link_cnt++;
	}

857
	/* Update send window */
858 859
	tipc_group_update_member(mb, blks);

860 861 862
	/* A broadcast sent within next EXPIRE period must follow same path */
	method->rcast = true;
	method->mandatory = true;
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
	return dlen;
}

/**
 * tipc_send_group_unicast - send message to a member in the group
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
				   int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct net *net = sock_net(sk);
	struct tipc_member *mb = NULL;
	u32 node, port;
	int rc;

	node = dest->addr.id.node;
	port = dest->addr.id.ref;
	if (!port && !node)
		return -EHOSTUNREACH;

	/* Block or return if destination link or member is congested */
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(&tsk->cong_links, node, 0) &&
				!tipc_group_cong(grp, node, port, blks, &mb));
	if (unlikely(rc))
		return rc;

	if (unlikely(!mb))
		return -EHOSTUNREACH;

	rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);

	return rc ? rc : dlen;
}

909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
/**
 * tipc_send_group_anycast - send message to any member with given identity
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
				   int dlen, long timeout)
{
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct list_head *cong_links = &tsk->cong_links;
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_group *grp = tsk->group;
928
	struct tipc_msg *hdr = &tsk->phdr;
929 930 931 932 933
	struct tipc_member *first = NULL;
	struct tipc_member *mbr = NULL;
	struct net *net = sock_net(sk);
	u32 node, port, exclude;
	struct list_head dsts;
934
	u32 type, inst, scope;
935 936 937 938 939 940
	int lookups = 0;
	int dstcnt, rc;
	bool cong;

	INIT_LIST_HEAD(&dsts);

941
	type = msg_nametype(hdr);
942
	inst = dest->addr.name.name.instance;
943
	scope = msg_lookup_scope(hdr);
944 945 946 947 948 949 950
	exclude = tipc_group_exclude(grp);

	while (++lookups < 4) {
		first = NULL;

		/* Look for a non-congested destination member, if any */
		while (1) {
951
			if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
						 &dstcnt, exclude, false))
				return -EHOSTUNREACH;
			tipc_dest_pop(&dsts, &node, &port);
			cong = tipc_group_cong(grp, node, port, blks, &mbr);
			if (!cong)
				break;
			if (mbr == first)
				break;
			if (!first)
				first = mbr;
		}

		/* Start over if destination was not in member list */
		if (unlikely(!mbr))
			continue;

		if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
			break;

		/* Block or return if destination link or member is congested */
		rc = tipc_wait_for_cond(sock, &timeout,
					!tipc_dest_find(cong_links, node, 0) &&
					!tipc_group_cong(grp, node, port,
							 blks, &mbr));
		if (unlikely(rc))
			return rc;

		/* Send, unless destination disappeared while waiting */
		if (likely(mbr))
			break;
	}

	if (unlikely(lookups >= 4))
		return -EHOSTUNREACH;

	rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);

	return rc ? rc : dlen;
}

J
Jon Maloy 已提交
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
/**
 * tipc_send_group_bcast - send message to all members in communication group
 * @sk: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
1005
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
J
Jon Maloy 已提交
1006 1007 1008 1009 1010 1011
	struct sock *sk = sock->sk;
	struct net *net = sock_net(sk);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_nlist *dsts = tipc_group_dests(grp);
	struct tipc_mc_method *method = &tsk->mc_method;
1012
	bool ack = method->mandatory && method->rcast;
1013
	int blks = tsk_blocks(MCAST_H_SIZE + dlen);
J
Jon Maloy 已提交
1014 1015 1016 1017 1018 1019 1020 1021
	struct tipc_msg *hdr = &tsk->phdr;
	int mtu = tipc_bcast_get_mtu(net);
	struct sk_buff_head pkts;
	int rc = -EHOSTUNREACH;

	if (!dsts->local && !dsts->remote)
		return -EHOSTUNREACH;

1022 1023 1024
	/* Block or return if any destination link or member is congested */
	rc = tipc_wait_for_cond(sock, &timeout,	!tsk->cong_link_cnt &&
				!tipc_group_bc_cong(grp, blks));
J
Jon Maloy 已提交
1025 1026 1027 1028
	if (unlikely(rc))
		return rc;

	/* Complete message header */
1029 1030 1031 1032 1033 1034 1035
	if (dest) {
		msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
		msg_set_nameinst(hdr, dest->addr.name.name.instance);
	} else {
		msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
		msg_set_nameinst(hdr, 0);
	}
1036
	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
J
Jon Maloy 已提交
1037 1038 1039 1040
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));

1041 1042 1043
	/* Avoid getting stuck with repeated forced replicasts */
	msg_set_grp_bc_ack_req(hdr, ack);

J
Jon Maloy 已提交
1044 1045 1046 1047 1048 1049 1050
	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
1051
	rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
J
Jon Maloy 已提交
1052 1053 1054
	if (unlikely(rc))
		return rc;

1055
	/* Update broadcast sequence number and send windows */
1056 1057 1058 1059 1060 1061
	tipc_group_update_bc_members(tsk->group, blks, ack);

	/* Broadcast link is now free to choose method for next broadcast */
	method->mandatory = false;
	method->expires = jiffies;

J
Jon Maloy 已提交
1062 1063 1064
	return dlen;
}

1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
/**
 * tipc_send_group_mcast - send message to all members with given identity
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
1082
	struct tipc_msg *hdr = &tsk->phdr;
1083
	struct net *net = sock_net(sk);
1084
	u32 type, inst, scope, exclude;
1085
	struct list_head dsts;
1086
	u32 dstcnt;
1087 1088 1089

	INIT_LIST_HEAD(&dsts);

1090 1091 1092
	type = msg_nametype(hdr);
	inst = dest->addr.name.name.instance;
	scope = msg_lookup_scope(hdr);
1093
	exclude = tipc_group_exclude(grp);
1094 1095 1096

	if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
				 &dstcnt, exclude, true))
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
		return -EHOSTUNREACH;

	if (dstcnt == 1) {
		tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
		return tipc_send_group_unicast(sock, m, dlen, timeout);
	}

	tipc_dest_list_purge(&dsts);
	return tipc_send_group_bcast(sock, m, dlen, timeout);
}

1108 1109 1110 1111 1112 1113
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
1114
 */
1115 1116
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
1117
{
J
Jon Maloy 已提交
1118
	u32 self = tipc_own_addr(net);
1119
	u32 type, lower, upper, scope;
1120
	struct sk_buff *skb, *_skb;
1121
	u32 portid, onode;
1122
	struct sk_buff_head tmpq;
J
Jon Maloy 已提交
1123
	struct list_head dports;
1124 1125 1126
	struct tipc_msg *hdr;
	int user, mtyp, hlen;
	bool exact;
1127

1128
	__skb_queue_head_init(&tmpq);
1129
	INIT_LIST_HEAD(&dports);
1130

1131 1132
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
1133 1134 1135 1136 1137 1138 1139
		hdr = buf_msg(skb);
		user = msg_user(hdr);
		mtyp = msg_type(hdr);
		hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
		onode = msg_orignode(hdr);
		type = msg_nametype(hdr);

1140 1141 1142 1143 1144 1145
		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
			spin_lock_bh(&inputq->lock);
			if (skb_peek(arrvq) == skb) {
				__skb_dequeue(arrvq);
				__skb_queue_tail(inputq, skb);
			}
J
Jon Maloy 已提交
1146
			kfree_skb(skb);
1147 1148 1149
			spin_unlock_bh(&inputq->lock);
			continue;
		}
1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165

		/* Group messages require exact scope match */
		if (msg_in_group(hdr)) {
			lower = 0;
			upper = ~0;
			scope = msg_lookup_scope(hdr);
			exact = true;
		} else {
			/* TIPC_NODE_SCOPE means "any scope" in this context */
			if (onode == self)
				scope = TIPC_NODE_SCOPE;
			else
				scope = TIPC_CLUSTER_SCOPE;
			exact = false;
			lower = msg_namelower(hdr);
			upper = msg_nameupper(hdr);
J
Jon Maloy 已提交
1166
		}
1167 1168 1169 1170 1171 1172

		/* Create destination port list: */
		tipc_nametbl_mc_lookup(net, type, lower, upper,
				       scope, exact, &dports);

		/* Clone message per destination */
J
Jon Maloy 已提交
1173
		while (tipc_dest_pop(&dports, NULL, &portid)) {
1174
			_skb = __pskb_copy(skb, hlen, GFP_ATOMIC);
1175 1176 1177 1178 1179 1180
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
1181
		}
1182 1183 1184 1185 1186 1187 1188 1189 1190
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
1191
	}
1192
	tipc_sk_rcv(net, inputq);
1193 1194
}

1195
/**
J
Jon Maloy 已提交
1196
 * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
1197
 * @tsk: receiving socket
1198
 * @skb: pointer to message buffer.
1199
 */
J
Jon Maloy 已提交
1200 1201
static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
				   struct sk_buff_head *xmitq)
1202
{
1203
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1204 1205
	u32 onode = tsk_own_node(tsk);
	struct sock *sk = &tsk->sk;
1206
	int mtyp = msg_type(hdr);
1207
	bool conn_cong;
1208

1209
	/* Ignore if connection cannot be validated: */
1210
	if (!tsk_peer_msg(tsk, hdr))
1211 1212
		goto exit;

1213 1214 1215 1216 1217 1218 1219 1220
	if (unlikely(msg_errcode(hdr))) {
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
				      tsk_peer_port(tsk));
		sk->sk_state_change(sk);
		goto exit;
	}

1221
	tsk->probe_unacked = false;
1222

1223 1224
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
1225 1226
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
1227 1228
		return;
	} else if (mtyp == CONN_ACK) {
1229
		conn_cong = tsk_conn_cong(tsk);
1230 1231 1232
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
1233
		if (conn_cong)
1234 1235 1236
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
1237 1238
	}
exit:
1239
	kfree_skb(skb);
1240 1241
}

P
Per Liden 已提交
1242
/**
1243
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
1244 1245
 * @sock: socket structure
 * @m: message to send
1246
 * @dsz: amount of user data to be sent
1247
 *
P
Per Liden 已提交
1248
 * Message must have an destination specified explicitly.
1249
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
1250 1251
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
1252
 *
P
Per Liden 已提交
1253 1254
 * Returns the number of bytes sent on success, or errno otherwise
 */
1255
static int tipc_sendmsg(struct socket *sock,
1256
			struct msghdr *m, size_t dsz)
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

1268
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1269
{
1270
	struct sock *sk = sock->sk;
1271
	struct net *net = sock_net(sk);
1272 1273 1274 1275 1276
	struct tipc_sock *tsk = tipc_sk(sk);
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct list_head *clinks = &tsk->cong_links;
	bool syn = !tipc_sk_type_connectionless(sk);
J
Jon Maloy 已提交
1277
	struct tipc_group *grp = tsk->group;
1278
	struct tipc_msg *hdr = &tsk->phdr;
1279
	struct tipc_name_seq *seq;
1280
	struct sk_buff_head pkts;
1281
	u32 dport, dnode = 0;
J
Jon Maloy 已提交
1282
	u32 type, inst;
1283
	int mtu, rc;
P
Per Liden 已提交
1284

1285
	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
1286
		return -EMSGSIZE;
1287

1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
	if (likely(dest)) {
		if (unlikely(m->msg_namelen < sizeof(*dest)))
			return -EINVAL;
		if (unlikely(dest->family != AF_TIPC))
			return -EINVAL;
	}

	if (grp) {
		if (!dest)
			return tipc_send_group_bcast(sock, m, dlen, timeout);
1298 1299
		if (dest->addrtype == TIPC_ADDR_NAME)
			return tipc_send_group_anycast(sock, m, dlen, timeout);
1300 1301
		if (dest->addrtype == TIPC_ADDR_ID)
			return tipc_send_group_unicast(sock, m, dlen, timeout);
1302 1303
		if (dest->addrtype == TIPC_ADDR_MCAST)
			return tipc_send_group_mcast(sock, m, dlen, timeout);
1304 1305
		return -EINVAL;
	}
J
Jon Maloy 已提交
1306

1307
	if (unlikely(!dest)) {
1308 1309
		dest = &tsk->peer;
		if (!syn || dest->family != AF_TIPC)
1310 1311
			return -EDESTADDRREQ;
	}
1312 1313

	if (unlikely(syn)) {
1314
		if (sk->sk_state == TIPC_LISTEN)
1315
			return -EPIPE;
1316
		if (sk->sk_state != TIPC_OPEN)
1317 1318 1319
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
1320
		if (dest->addrtype == TIPC_ADDR_NAME) {
1321 1322
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
1323
		}
1324
		msg_set_syn(hdr, 1);
P
Per Liden 已提交
1325
	}
1326

1327 1328 1329
	seq = &dest->addr.nameseq;
	if (dest->addrtype == TIPC_ADDR_MCAST)
		return tipc_sendmcast(sock, seq, m, dlen, timeout);
1330

1331 1332 1333
	if (dest->addrtype == TIPC_ADDR_NAME) {
		type = dest->addr.name.name.type;
		inst = dest->addr.name.name.instance;
J
Jon Maloy 已提交
1334
		dnode = dest->addr.name.domain;
1335 1336 1337 1338
		msg_set_type(hdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
		msg_set_nametype(hdr, type);
		msg_set_nameinst(hdr, inst);
J
Jon Maloy 已提交
1339
		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
1340
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
1341 1342
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dport);
1343 1344
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
1345 1346
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
1347 1348 1349 1350 1351
		msg_set_type(hdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(hdr, 0);
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dest->addr.id.ref);
		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
1352 1353
	} else {
		return -EINVAL;
1354 1355
	}

1356
	/* Block or return if destination link is congested */
J
Jon Maloy 已提交
1357 1358
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(clinks, dnode, 0));
1359 1360 1361 1362
	if (unlikely(rc))
		return rc;

	skb_queue_head_init(&pkts);
1363
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
1364 1365
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
1366
		return rc;
1367 1368
	if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue)))
		return -ENOMEM;
1369

1370 1371
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
J
Jon Maloy 已提交
1372
		tipc_dest_push(clinks, dnode, 0);
1373 1374 1375
		tsk->cong_link_cnt++;
		rc = 0;
	}
1376

1377 1378 1379 1380
	if (unlikely(syn && !rc))
		tipc_set_sk_state(sk, TIPC_CONNECTING);

	return rc ? rc : dlen;
P
Per Liden 已提交
1381 1382
}

1383
/**
1384
 * tipc_sendstream - send stream-oriented data
P
Per Liden 已提交
1385
 * @sock: socket structure
1386 1387
 * @m: data to send
 * @dsz: total length of data to be transmitted
1388
 *
1389
 * Used for SOCK_STREAM data.
1390
 *
1391 1392
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1393
 */
1394
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1395 1396 1397 1398 1399
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
1400
	ret = __tipc_sendstream(sock, m, dsz);
1401 1402 1403 1404 1405
	release_sock(sk);

	return ret;
}

1406
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1407
{
1408
	struct sock *sk = sock->sk;
1409
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1410 1411 1412 1413 1414 1415 1416 1417
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = &tsk->phdr;
	struct net *net = sock_net(sk);
	struct sk_buff_head pkts;
	u32 dnode = tsk_peer_node(tsk);
	int send, sent = 0;
	int rc = 0;
1418

1419
	skb_queue_head_init(&pkts);
1420

1421 1422
	if (unlikely(dlen > INT_MAX))
		return -EMSGSIZE;
1423

1424 1425 1426 1427 1428
	/* Handle implicit connection setup */
	if (unlikely(dest)) {
		rc = __tipc_sendmsg(sock, m, dlen);
		if (dlen && (dlen == rc))
			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1429
		return rc;
1430
	}
1431

1432
	do {
1433 1434
		rc = tipc_wait_for_cond(sock, &timeout,
					(!tsk->cong_link_cnt &&
1435 1436
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454
		if (unlikely(rc))
			break;

		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
		if (unlikely(rc != send))
			break;

		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
		if (unlikely(rc == -ELINKCONG)) {
			tsk->cong_link_cnt = 1;
			rc = 0;
		}
		if (likely(!rc)) {
			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
			sent += send;
		}
	} while (sent < dlen && !rc);
1455

1456
	return sent ? sent : rc;
P
Per Liden 已提交
1457 1458
}

1459
/**
1460
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1461
 * @sock: socket structure
1462 1463
 * @m: message to send
 * @dsz: length of data to be transmitted
1464
 *
1465
 * Used for SOCK_SEQPACKET messages.
1466
 *
1467
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1468
 */
1469
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1470
{
1471 1472
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1473

1474
	return tipc_sendstream(sock, m, dsz);
P
Per Liden 已提交
1475 1476
}

1477
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1478
 */
1479
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1480
				u32 peer_node)
P
Per Liden 已提交
1481
{
1482 1483
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1484
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1485

1486
	msg_set_syn(msg, 0);
1487 1488 1489 1490 1491
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1492

1493
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
1494
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1495 1496
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1497
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1498
	__skb_queue_purge(&sk->sk_write_queue);
1499 1500 1501 1502 1503 1504
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1505 1506 1507
}

/**
1508
 * tipc_sk_set_orig_addr - capture sender's address for received message
P
Per Liden 已提交
1509
 * @m: descriptor for message info
1510
 * @hdr: received message header
1511
 *
P
Per Liden 已提交
1512 1513
 * Note: Address is not captured if not requested by receiver.
 */
1514
static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
P
Per Liden 已提交
1515
{
1516 1517 1518 1519 1520 1521 1522 1523
	DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
	struct tipc_msg *hdr = buf_msg(skb);

	if (!srcaddr)
		return;

	srcaddr->sock.family = AF_TIPC;
	srcaddr->sock.addrtype = TIPC_ADDR_ID;
1524
	srcaddr->sock.scope = 0;
1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535
	srcaddr->sock.addr.id.ref = msg_origport(hdr);
	srcaddr->sock.addr.id.node = msg_orignode(hdr);
	srcaddr->sock.addr.name.domain = 0;
	m->msg_namelen = sizeof(struct sockaddr_tipc);

	if (!msg_in_group(hdr))
		return;

	/* Group message users may also want to know sending member's id */
	srcaddr->member.family = AF_TIPC;
	srcaddr->member.addrtype = TIPC_ADDR_NAME;
1536
	srcaddr->member.scope = 0;
1537 1538 1539 1540
	srcaddr->member.addr.name.name.type = msg_nametype(hdr);
	srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
	srcaddr->member.addr.name.domain = 0;
	m->msg_namelen = sizeof(*srcaddr);
P
Per Liden 已提交
1541 1542 1543
}

/**
1544
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1545 1546
 * @m: descriptor for message info
 * @msg: received message header
1547
 * @tsk: TIPC port associated with message
1548
 *
P
Per Liden 已提交
1549
 * Note: Ancillary data is not captured if not requested by receiver.
1550
 *
P
Per Liden 已提交
1551 1552
 * Returns 0 if successful, otherwise errno
 */
1553 1554
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1555 1556 1557 1558
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1559
	int has_name;
P
Per Liden 已提交
1560 1561 1562 1563 1564 1565 1566 1567 1568 1569
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1570 1571
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1572
			return res;
1573 1574 1575 1576 1577 1578
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1579 1580 1581 1582 1583 1584
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1585
		has_name = 1;
P
Per Liden 已提交
1586 1587 1588 1589 1590
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1591
		has_name = 1;
P
Per Liden 已提交
1592 1593 1594 1595 1596
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1597 1598 1599 1600
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1601 1602
		break;
	default:
1603
		has_name = 0;
P
Per Liden 已提交
1604
	}
1605 1606 1607 1608 1609
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1610 1611 1612 1613

	return 0;
}

1614
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1615
{
1616 1617
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1618
	struct sk_buff *skb = NULL;
1619
	struct tipc_msg *msg;
1620 1621
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1622

1623
	if (!tipc_sk_connected(sk))
1624
		return;
1625 1626 1627
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1628
	if (!skb)
1629
		return;
1630
	msg = buf_msg(skb);
1631 1632 1633 1634 1635 1636 1637 1638
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1639
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1640 1641
}

1642
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1643 1644 1645
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1646
	long timeo = *timeop;
1647 1648 1649 1650
	int err = sock_error(sk);

	if (err)
		return err;
Y
Ying Xue 已提交
1651 1652 1653

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1654
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1655
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1669 1670 1671
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
1672 1673 1674 1675

		err = sock_error(sk);
		if (err)
			break;
Y
Ying Xue 已提交
1676 1677
	}
	finish_wait(sk_sleep(sk), &wait);
1678
	*timeop = timeo;
Y
Ying Xue 已提交
1679 1680 1681
	return err;
}

1682
/**
1683
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1684
 * @m: descriptor for message info
1685
 * @buflen: length of user buffer area
P
Per Liden 已提交
1686
 * @flags: receive flags
1687
 *
P
Per Liden 已提交
1688 1689 1690 1691 1692
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1693 1694
static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
			size_t buflen,	int flags)
P
Per Liden 已提交
1695
{
1696
	struct sock *sk = sock->sk;
1697
	bool connected = !tipc_sk_type_connectionless(sk);
1698
	struct tipc_sock *tsk = tipc_sk(sk);
1699
	int rc, err, hlen, dlen, copy;
1700
	struct sk_buff_head xmitq;
1701 1702 1703
	struct tipc_msg *hdr;
	struct sk_buff *skb;
	bool grp_evt;
1704
	long timeout;
P
Per Liden 已提交
1705

1706
	/* Catch invalid receive requests */
1707
	if (unlikely(!buflen))
P
Per Liden 已提交
1708 1709
		return -EINVAL;

1710
	lock_sock(sk);
1711 1712
	if (unlikely(connected && sk->sk_state == TIPC_OPEN)) {
		rc = -ENOTCONN;
P
Per Liden 已提交
1713 1714
		goto exit;
	}
1715
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1716

1717
	/* Step rcv queue to first msg with data or error; wait if necessary */
1718 1719 1720 1721 1722 1723 1724 1725 1726
	do {
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			goto exit;
		skb = skb_peek(&sk->sk_receive_queue);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1727
		grp_evt = msg_is_grp_evt(hdr);
1728 1729
		if (likely(dlen || err))
			break;
1730
		tsk_advance_rx_queue(sk);
1731
	} while (1);
P
Per Liden 已提交
1732

1733
	/* Collect msg meta data, including error code and rejected data */
1734
	tipc_sk_set_orig_addr(m, skb);
1735 1736
	rc = tipc_sk_anc_data_recv(m, hdr, tsk);
	if (unlikely(rc))
P
Per Liden 已提交
1737 1738
		goto exit;

1739 1740 1741 1742
	/* Capture data if non-error msg, otherwise just set return value */
	if (likely(!err)) {
		copy = min_t(int, dlen, buflen);
		if (unlikely(copy != dlen))
P
Per Liden 已提交
1743
			m->msg_flags |= MSG_TRUNC;
1744
		rc = skb_copy_datagram_msg(skb, hlen, m, copy);
P
Per Liden 已提交
1745
	} else {
1746 1747 1748 1749
		copy = 0;
		rc = 0;
		if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
			rc = -ECONNRESET;
P
Per Liden 已提交
1750
	}
1751 1752
	if (unlikely(rc))
		goto exit;
P
Per Liden 已提交
1753

1754 1755 1756 1757 1758 1759 1760 1761
	/* Mark message as group event if applicable */
	if (unlikely(grp_evt)) {
		if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
			m->msg_flags |= MSG_EOR;
		m->msg_flags |= MSG_OOB;
		copy = 0;
	}

1762
	/* Caption of data or error code/rejected data was successful */
1763 1764 1765
	if (unlikely(flags & MSG_PEEK))
		goto exit;

1766 1767 1768 1769 1770 1771 1772 1773 1774
	/* Send group flow control advertisement when applicable */
	if (tsk->group && msg_in_group(hdr) && !grp_evt) {
		skb_queue_head_init(&xmitq);
		tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
					  msg_orignode(hdr), msg_origport(hdr),
					  &xmitq);
		tipc_node_distr_xmit(sock_net(sk), &xmitq);
	}

1775
	tsk_advance_rx_queue(sk);
1776

1777 1778 1779
	if (likely(!connected))
		goto exit;

1780
	/* Send connection flow control advertisement when applicable */
1781 1782 1783
	tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
	if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
		tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1784
exit:
1785
	release_sock(sk);
1786
	return rc ? rc : copy;
P
Per Liden 已提交
1787 1788
}

1789
/**
1790
 * tipc_recvstream - receive stream-oriented data
P
Per Liden 已提交
1791
 * @m: descriptor for message info
1792
 * @buflen: total size of user buffer area
P
Per Liden 已提交
1793
 * @flags: receive flags
1794 1795
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1796 1797 1798 1799
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1800 1801
static int tipc_recvstream(struct socket *sock, struct msghdr *m,
			   size_t buflen, int flags)
P
Per Liden 已提交
1802
{
1803
	struct sock *sk = sock->sk;
1804
	struct tipc_sock *tsk = tipc_sk(sk);
1805 1806 1807 1808 1809 1810 1811
	struct sk_buff *skb;
	struct tipc_msg *hdr;
	struct tipc_skb_cb *skb_cb;
	bool peek = flags & MSG_PEEK;
	int offset, required, copy, copied = 0;
	int hlen, dlen, err, rc;
	long timeout;
P
Per Liden 已提交
1812

1813
	/* Catch invalid receive attempts */
1814
	if (unlikely(!buflen))
P
Per Liden 已提交
1815 1816
		return -EINVAL;

1817
	lock_sock(sk);
P
Per Liden 已提交
1818

1819
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1820
		rc = -ENOTCONN;
Y
Ying Xue 已提交
1821
		goto exit;
P
Per Liden 已提交
1822
	}
1823 1824
	required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen);
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1825

1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836
	do {
		/* Look at first msg in receive queue; wait if necessary */
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			break;
		skb = skb_peek(&sk->sk_receive_queue);
		skb_cb = TIPC_SKB_CB(skb);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1837

1838 1839 1840 1841 1842
		/* Discard any empty non-errored (SYN-) message */
		if (unlikely(!dlen && !err)) {
			tsk_advance_rx_queue(sk);
			continue;
		}
1843

1844 1845
		/* Collect msg meta data, incl. error code and rejected data */
		if (!copied) {
1846
			tipc_sk_set_orig_addr(m, skb);
1847 1848 1849 1850
			rc = tipc_sk_anc_data_recv(m, hdr, tsk);
			if (rc)
				break;
		}
P
Per Liden 已提交
1851

1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871
		/* Copy data if msg ok, otherwise return error/partial data */
		if (likely(!err)) {
			offset = skb_cb->bytes_read;
			copy = min_t(int, dlen - offset, buflen - copied);
			rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
			if (unlikely(rc))
				break;
			copied += copy;
			offset += copy;
			if (unlikely(offset < dlen)) {
				if (!peek)
					skb_cb->bytes_read = offset;
				break;
			}
		} else {
			rc = 0;
			if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control)
				rc = -ECONNRESET;
			if (copied || rc)
				break;
P
Per Liden 已提交
1872 1873
		}

1874 1875
		if (unlikely(peek))
			break;
P
Per Liden 已提交
1876

1877
		tsk_advance_rx_queue(sk);
1878

1879 1880 1881 1882
		/* Send connection flow control advertisement when applicable */
		tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
		if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
			tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1883

1884 1885 1886
		/* Exit if all requested data or FIN/error received */
		if (copied == buflen || err)
			break;
P
Per Liden 已提交
1887

1888
	} while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required);
P
Per Liden 已提交
1889
exit:
1890
	release_sock(sk);
1891
	return copied ? copied : rc;
P
Per Liden 已提交
1892 1893
}

1894 1895 1896 1897 1898 1899 1900 1901 1902 1903
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1904
	if (skwq_has_sleeper(wq))
1905 1906
		wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
						EPOLLWRNORM | EPOLLWRBAND);
1907 1908 1909 1910 1911 1912 1913 1914
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1915
static void tipc_data_ready(struct sock *sk)
1916 1917 1918 1919 1920
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1921
	if (skwq_has_sleeper(wq))
1922 1923
		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
						EPOLLRDNORM | EPOLLRDBAND);
1924 1925 1926
	rcu_read_unlock();
}

1927 1928 1929 1930 1931
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

J
Jon Maloy 已提交
1932 1933 1934 1935 1936 1937 1938
static void tipc_sk_proto_rcv(struct sock *sk,
			      struct sk_buff_head *inputq,
			      struct sk_buff_head *xmitq)
{
	struct sk_buff *skb = __skb_dequeue(inputq);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1939
	struct tipc_group *grp = tsk->group;
1940
	bool wakeup = false;
J
Jon Maloy 已提交
1941 1942 1943 1944 1945 1946

	switch (msg_user(hdr)) {
	case CONN_MANAGER:
		tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
		return;
	case SOCK_WAKEUP:
J
Jon Maloy 已提交
1947
		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
J
Jon Maloy 已提交
1948
		tsk->cong_link_cnt--;
1949
		wakeup = true;
J
Jon Maloy 已提交
1950
		break;
J
Jon Maloy 已提交
1951
	case GROUP_PROTOCOL:
1952
		tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
J
Jon Maloy 已提交
1953
		break;
J
Jon Maloy 已提交
1954
	case TOP_SRV:
1955
		tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
1956
				      hdr, inputq, xmitq);
J
Jon Maloy 已提交
1957 1958 1959 1960 1961
		break;
	default:
		break;
	}

1962 1963 1964
	if (wakeup)
		sk->sk_write_space(sk);

J
Jon Maloy 已提交
1965 1966 1967
	kfree_skb(skb);
}

1968
/**
1969
 * tipc_sk_filter_connect - check incoming message for a connection-based socket
1970
 * @tsk: TIPC socket
1971 1972
 * @skb: pointer to message buffer.
 * Returns true if message should be added to receive queue, false otherwise
1973
 */
J
Jon Maloy 已提交
1974
static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1975
{
1976
	struct sock *sk = &tsk->sk;
1977
	struct net *net = sock_net(sk);
1978
	struct tipc_msg *hdr = buf_msg(skb);
1979 1980 1981 1982 1983 1984
	bool con_msg = msg_connected(hdr);
	u32 pport = tsk_peer_port(tsk);
	u32 pnode = tsk_peer_node(tsk);
	u32 oport = msg_origport(hdr);
	u32 onode = msg_orignode(hdr);
	int err = msg_errcode(hdr);
1985
	unsigned long delay;
1986

1987 1988
	if (unlikely(msg_mcast(hdr)))
		return false;
1989

1990 1991
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
		/* Setup ACK */
		if (likely(con_msg)) {
			if (err)
				break;
			tipc_sk_finish_conn(tsk, oport, onode);
			msg_set_importance(&tsk->phdr, msg_importance(hdr));
			/* ACK+ message with data is added to receive queue */
			if (msg_data_sz(hdr))
				return true;
			/* Empty ACK-, - wake up sleeping connect() and drop */
			sk->sk_data_ready(sk);
			msg_set_dest_droppable(hdr, 1);
			return false;
2005
		}
2006 2007 2008
		/* Ignore connectionless message if not from listening socket */
		if (oport != pport || onode != pnode)
			return false;
2009

2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021
		/* Rejected SYN */
		if (err != TIPC_ERR_OVERLOAD)
			break;

		/* Prepare for new setup attempt if we have a SYN clone */
		if (skb_queue_empty(&sk->sk_write_queue))
			break;
		get_random_bytes(&delay, 2);
		delay %= (tsk->conn_timeout / 4);
		delay = msecs_to_jiffies(delay + 100);
		sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
		return false;
2022
	case TIPC_OPEN:
2023
	case TIPC_DISCONNECTING:
2024
		return false;
2025
	case TIPC_LISTEN:
2026
		/* Accept only SYN message */
2027 2028 2029
		if (!msg_is_syn(hdr) &&
		    tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT)
			return false;
2030
		if (!con_msg && !err)
2031
			return true;
2032
		return false;
2033 2034
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
2035 2036 2037
		if (likely(con_msg && !err && pport == oport && pnode == onode))
			return true;
		if (!tsk_peer_msg(tsk, hdr))
2038
			return false;
2039 2040 2041 2042 2043
		if (!err)
			return true;
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		tipc_node_remove_conn(net, pnode, tsk->portid);
		sk->sk_state_change(sk);
2044
		return true;
2045
	default:
2046
		pr_err("Unknown sk_state %u\n", sk->sk_state);
2047
	}
2048 2049 2050 2051 2052
	/* Abort connection setup attempt */
	tipc_set_sk_state(sk, TIPC_DISCONNECTING);
	sk->sk_err = ECONNREFUSED;
	sk->sk_state_change(sk);
	return true;
2053 2054
}

2055 2056 2057
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
2058
 * @skb: message
2059
 *
2060 2061
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
2062
 *
2063 2064
 * For connectionless messages, queue limits are based on message
 * importance as follows:
2065
 *
2066 2067 2068 2069
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
2070 2071 2072
 *
 * Returns overload limit according to corresponding message importance
 */
2073
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
2074
{
2075 2076 2077
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

2078 2079 2080
	if (unlikely(msg_in_group(hdr)))
		return sk->sk_rcvbuf;

2081 2082
	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
2083

2084 2085
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
2086

2087
	return FLOWCTL_MSG_LIM;
2088 2089
}

2090
/**
J
Jon Maloy 已提交
2091
 * tipc_sk_filter_rcv - validate incoming message
2092
 * @sk: socket
2093
 * @skb: pointer to message.
2094
 *
2095 2096 2097
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
2098
 * Called with socket lock already taken
2099
 *
P
Per Liden 已提交
2100
 */
J
Jon Maloy 已提交
2101 2102
static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
			       struct sk_buff_head *xmitq)
P
Per Liden 已提交
2103
{
J
Jon Maloy 已提交
2104
	bool sk_conn = !tipc_sk_type_connectionless(sk);
2105
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2106
	struct tipc_group *grp = tsk->group;
2107
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
2108 2109 2110
	struct net *net = sock_net(sk);
	struct sk_buff_head inputq;
	int limit, err = TIPC_OK;
2111

J
Jon Maloy 已提交
2112 2113 2114
	TIPC_SKB_CB(skb)->bytes_read = 0;
	__skb_queue_head_init(&inputq);
	__skb_queue_tail(&inputq, skb);
2115

J
Jon Maloy 已提交
2116 2117
	if (unlikely(!msg_isdata(hdr)))
		tipc_sk_proto_rcv(sk, &inputq, xmitq);
2118

J
Jon Maloy 已提交
2119 2120 2121
	if (unlikely(grp))
		tipc_group_filter_msg(grp, &inputq, xmitq);

J
Jon Maloy 已提交
2122 2123 2124 2125 2126
	/* Validate and add to receive buffer if there is space */
	while ((skb = __skb_dequeue(&inputq))) {
		hdr = buf_msg(skb);
		limit = rcvbuf_limit(sk, skb);
		if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
J
Jon Maloy 已提交
2127 2128
		    (!sk_conn && msg_connected(hdr)) ||
		    (!grp && msg_in_group(hdr)))
2129
			err = TIPC_ERR_NO_PORT;
2130 2131
		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
			atomic_inc(&sk->sk_drops);
J
Jon Maloy 已提交
2132
			err = TIPC_ERR_OVERLOAD;
2133
		}
P
Per Liden 已提交
2134

J
Jon Maloy 已提交
2135 2136 2137 2138 2139 2140 2141 2142
		if (unlikely(err)) {
			tipc_skb_reject(net, err, skb, xmitq);
			err = TIPC_OK;
			continue;
		}
		__skb_queue_tail(&sk->sk_receive_queue, skb);
		skb_set_owner_r(skb, sk);
		sk->sk_data_ready(sk);
2143
	}
2144
}
P
Per Liden 已提交
2145

2146
/**
J
Jon Maloy 已提交
2147
 * tipc_sk_backlog_rcv - handle incoming message from backlog queue
2148
 * @sk: socket
2149
 * @skb: message
2150
 *
2151
 * Caller must hold socket lock
2152
 */
J
Jon Maloy 已提交
2153
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
2154
{
J
Jon Maloy 已提交
2155
	unsigned int before = sk_rmem_alloc_get(sk);
J
Jon Paul Maloy 已提交
2156
	struct sk_buff_head xmitq;
J
Jon Maloy 已提交
2157
	unsigned int added;
2158

J
Jon Paul Maloy 已提交
2159 2160
	__skb_queue_head_init(&xmitq);

J
Jon Maloy 已提交
2161 2162 2163
	tipc_sk_filter_rcv(sk, skb, &xmitq);
	added = sk_rmem_alloc_get(sk) - before;
	atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
2164

J
Jon Maloy 已提交
2165
	/* Send pending response/rejected messages, if any */
2166
	tipc_node_distr_xmit(sock_net(sk), &xmitq);
2167 2168 2169
	return 0;
}

2170
/**
2171 2172 2173 2174 2175
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
2176 2177 2178
 *
 * Caller must hold socket lock
 */
2179
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
2180
			    u32 dport, struct sk_buff_head *xmitq)
2181
{
J
Jon Paul Maloy 已提交
2182 2183
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
2184 2185
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
2186
	u32 onode;
2187 2188

	while (skb_queue_len(inputq)) {
2189
		if (unlikely(time_after_eq(jiffies, time_limit)))
2190 2191
			return;

2192 2193
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
2194 2195 2196
			return;

		/* Add message directly to receive queue if possible */
2197
		if (!sock_owned_by_user(sk)) {
J
Jon Maloy 已提交
2198
			tipc_sk_filter_rcv(sk, skb, xmitq);
2199
			continue;
2200
		}
2201 2202

		/* Try backlog, compensating for double-counted bytes */
2203
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
2204
		if (!sk->sk_backlog.len)
2205 2206 2207 2208
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
2209 2210

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
2211
		onode = tipc_own_addr(sock_net(sk));
2212
		atomic_inc(&sk->sk_drops);
J
Jon Paul Maloy 已提交
2213 2214
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
2215
		break;
2216
	}
2217 2218
}

2219
/**
2220 2221 2222 2223
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
2224
 */
2225
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
2226
{
J
Jon Paul Maloy 已提交
2227
	struct sk_buff_head xmitq;
2228
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
2229
	int err;
2230 2231
	struct tipc_sock *tsk;
	struct sock *sk;
2232
	struct sk_buff *skb;
2233

J
Jon Paul Maloy 已提交
2234
	__skb_queue_head_init(&xmitq);
2235 2236 2237
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
2238

2239 2240 2241
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
2242
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
2243 2244
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
2245
			/* Send pending response/rejected messages, if any */
2246
			tipc_node_distr_xmit(sock_net(sk), &xmitq);
2247 2248 2249
			sock_put(sk);
			continue;
		}
2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261
		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
2262
			continue;
2263
xmit:
2264
		dnode = msg_destnode(buf_msg(skb));
2265
		tipc_node_xmit_skb(net, skb, dnode, dport);
2266
	}
P
Per Liden 已提交
2267 2268
}

Y
Ying Xue 已提交
2269 2270
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
2271
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
2284
		add_wait_queue(sk_sleep(sk), &wait);
2285
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
2286 2287
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
2288 2289 2290 2291
	} while (!done);
	return 0;
}

P
Per Liden 已提交
2292
/**
2293
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
2294 2295 2296
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
2297
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
2298 2299 2300
 *
 * Returns 0 on success, errno otherwise
 */
2301 2302
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
2303
{
2304
	struct sock *sk = sock->sk;
2305
	struct tipc_sock *tsk = tipc_sk(sk);
2306 2307
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
2308
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
2309
	int previous;
2310
	int res = 0;
2311

2312 2313 2314
	if (destlen != sizeof(struct sockaddr_tipc))
		return -EINVAL;

2315 2316
	lock_sock(sk);

J
Jon Maloy 已提交
2317 2318 2319 2320 2321
	if (tsk->group) {
		res = -EINVAL;
		goto exit;
	}

2322 2323 2324
	if (dst->family == AF_UNSPEC) {
		memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
		if (!tipc_sk_type_connectionless(sk))
2325
			res = -EINVAL;
2326
		goto exit;
2327 2328
	} else if (dst->family != AF_TIPC) {
		res = -EINVAL;
2329
	}
2330
	if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
2331
		res = -EINVAL;
2332 2333 2334 2335 2336 2337
	if (res)
		goto exit;

	/* DGRAM/RDM connect(), just save the destaddr */
	if (tipc_sk_type_connectionless(sk)) {
		memcpy(&tsk->peer, dest, destlen);
2338 2339 2340
		goto exit;
	}

2341
	previous = sk->sk_state;
2342 2343 2344

	switch (sk->sk_state) {
	case TIPC_OPEN:
2345 2346 2347 2348 2349 2350 2351 2352 2353 2354
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

2355
		res = __tipc_sendmsg(sock, &m, 0);
2356 2357 2358
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

2359
		/* Just entered TIPC_CONNECTING state; the only
2360 2361 2362 2363
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
2364 2365 2366 2367 2368
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
2369
			goto exit;
2370
		}
Y
Ying Xue 已提交
2371 2372 2373
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
2374 2375
		break;
	case TIPC_ESTABLISHED:
2376
		res = -EISCONN;
2377 2378
		break;
	default:
2379
		res = -EINVAL;
2380
	}
2381

2382 2383
exit:
	release_sock(sk);
2384
	return res;
P
Per Liden 已提交
2385 2386
}

2387
/**
2388
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
2389 2390
 * @sock: socket structure
 * @len: (unused)
2391
 *
P
Per Liden 已提交
2392 2393
 * Returns 0 on success, errno otherwise
 */
2394
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
2395
{
2396 2397 2398 2399
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
2400
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
2401
	release_sock(sk);
2402

2403
	return res;
P
Per Liden 已提交
2404 2405
}

Y
Ying Xue 已提交
2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
2420
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
2421 2422 2423 2424 2425 2426 2427 2428 2429 2430
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
2431 2432 2433
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
2434 2435 2436 2437 2438
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2439
/**
2440
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2441 2442 2443
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2444
 *
P
Per Liden 已提交
2445 2446
 * Returns 0 on success, errno otherwise
 */
2447 2448
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern)
P
Per Liden 已提交
2449
{
2450
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2451
	struct sk_buff *buf;
2452
	struct tipc_sock *new_tsock;
2453
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2454
	long timeo;
2455
	int res;
P
Per Liden 已提交
2456

2457
	lock_sock(sk);
P
Per Liden 已提交
2458

2459
	if (sk->sk_state != TIPC_LISTEN) {
2460
		res = -EINVAL;
P
Per Liden 已提交
2461 2462
		goto exit;
	}
Y
Ying Xue 已提交
2463 2464 2465 2466
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2467 2468 2469

	buf = skb_peek(&sk->sk_receive_queue);

2470
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
2471 2472
	if (res)
		goto exit;
2473
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2474

2475
	new_sk = new_sock->sk;
2476
	new_tsock = tipc_sk(new_sk);
2477
	msg = buf_msg(buf);
P
Per Liden 已提交
2478

2479 2480 2481 2482 2483 2484 2485
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2486
	tsk_rej_rx_queue(new_sk);
2487 2488

	/* Connect new socket to it's peer */
2489
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2490

2491
	tsk_set_importance(new_tsock, msg_importance(msg));
2492
	if (msg_named(msg)) {
2493 2494
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2495
	}
2496 2497 2498 2499 2500 2501 2502 2503

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2504
		tsk_advance_rx_queue(sk);
2505
		__tipc_sendstream(new_sock, &m, 0);
2506 2507 2508
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2509
		skb_set_owner_r(buf, new_sk);
2510 2511
	}
	release_sock(new_sk);
P
Per Liden 已提交
2512
exit:
2513
	release_sock(sk);
P
Per Liden 已提交
2514 2515 2516 2517
	return res;
}

/**
2518
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2519
 * @sock: socket structure
2520
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2521 2522
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2523
 *
P
Per Liden 已提交
2524 2525
 * Returns 0 on success, errno otherwise
 */
2526
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2527
{
2528
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2529 2530
	int res;

2531 2532
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2533

2534
	lock_sock(sk);
P
Per Liden 已提交
2535

2536 2537
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2538

2539
	if (sk->sk_state == TIPC_DISCONNECTING) {
2540
		/* Discard any unreceived messages */
2541
		__skb_queue_purge(&sk->sk_receive_queue);
2542 2543 2544

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2545
		res = 0;
2546
	} else {
P
Per Liden 已提交
2547 2548 2549
		res = -ENOTCONN;
	}

2550
	release_sock(sk);
P
Per Liden 已提交
2551 2552 2553
	return res;
}

2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579
static void tipc_sk_check_probing_state(struct sock *sk,
					struct sk_buff_head *list)
{
	struct tipc_sock *tsk = tipc_sk(sk);
	u32 pnode = tsk_peer_node(tsk);
	u32 pport = tsk_peer_port(tsk);
	u32 self = tsk_own_node(tsk);
	u32 oport = tsk->portid;
	struct sk_buff *skb;

	if (tsk->probe_unacked) {
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		sk->sk_err = ECONNABORTED;
		tipc_node_remove_conn(sock_net(sk), pnode, pport);
		sk->sk_state_change(sk);
		return;
	}
	/* Prepare new probe */
	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
			      pnode, self, pport, oport, TIPC_OK);
	if (skb)
		__skb_queue_tail(list, skb);
	tsk->probe_unacked = true;
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
}

2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592
static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
{
	struct tipc_sock *tsk = tipc_sk(sk);

	/* Try again later if dest link is congested */
	if (tsk->cong_link_cnt) {
		sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
		return;
	}
	/* Prepare SYN for retransmit */
	tipc_msg_skb_clone(&sk->sk_write_queue, list);
}

2593
static void tipc_sk_timeout(struct timer_list *t)
2594
{
2595 2596
	struct sock *sk = from_timer(sk, t, sk_timer);
	struct tipc_sock *tsk = tipc_sk(sk);
2597 2598
	u32 pnode = tsk_peer_node(tsk);
	struct sk_buff_head list;
2599
	int rc = 0;
2600

2601
	skb_queue_head_init(&list);
J
Jon Paul Maloy 已提交
2602
	bh_lock_sock(sk);
2603 2604 2605 2606

	/* Try again later if socket is busy */
	if (sock_owned_by_user(sk)) {
		sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
2607 2608
		bh_unlock_sock(sk);
		return;
2609 2610
	}

2611 2612
	if (sk->sk_state == TIPC_ESTABLISHED)
		tipc_sk_check_probing_state(sk, &list);
2613 2614
	else if (sk->sk_state == TIPC_CONNECTING)
		tipc_sk_retry_connect(sk, &list);
2615

2616
	bh_unlock_sock(sk);
2617 2618

	if (!skb_queue_empty(&list))
2619
		rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid);
2620

2621 2622 2623 2624 2625
	/* SYN messages may cause link congestion */
	if (rc == -ELINKCONG) {
		tipc_dest_push(&tsk->cong_links, pnode, 0);
		tsk->cong_link_cnt = 1;
	}
2626
	sock_put(sk);
2627 2628
}

2629
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2630 2631
			   struct tipc_name_seq const *seq)
{
2632 2633
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2634 2635 2636
	struct publication *publ;
	u32 key;

J
Jon Maloy 已提交
2637 2638 2639
	if (scope != TIPC_NODE_SCOPE)
		scope = TIPC_CLUSTER_SCOPE;

2640
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2641
		return -EINVAL;
2642 2643
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2644 2645
		return -EADDRINUSE;

2646
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2647
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2648 2649 2650
	if (unlikely(!publ))
		return -EINVAL;

J
Jon Maloy 已提交
2651
	list_add(&publ->binding_sock, &tsk->publications);
2652 2653
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2654 2655 2656
	return 0;
}

2657
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2658 2659
			    struct tipc_name_seq const *seq)
{
2660
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2661 2662 2663 2664
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

J
Jon Maloy 已提交
2665 2666 2667
	if (scope != TIPC_NODE_SCOPE)
		scope = TIPC_CLUSTER_SCOPE;

J
Jon Maloy 已提交
2668
	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
J
Jon Paul Maloy 已提交
2669 2670 2671 2672 2673 2674 2675 2676 2677
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2678
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
2679
					      publ->upper, publ->key);
J
Jon Paul Maloy 已提交
2680 2681 2682
			rc = 0;
			break;
		}
2683
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
2684
				      publ->upper, publ->key);
J
Jon Paul Maloy 已提交
2685 2686
		rc = 0;
	}
2687 2688
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2689 2690 2691
	return rc;
}

2692 2693 2694
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2695
void tipc_sk_reinit(struct net *net)
2696
{
2697
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2698
	struct rhashtable_iter iter;
2699
	struct tipc_sock *tsk;
2700 2701
	struct tipc_msg *msg;

2702 2703 2704
	rhashtable_walk_enter(&tn->sk_rht, &iter);

	do {
2705
		rhashtable_walk_start(&iter);
2706 2707

		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
2708 2709
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2710 2711
			msg_set_prevnode(msg, tipc_own_addr(net));
			msg_set_orignode(msg, tipc_own_addr(net));
2712 2713
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2714

2715 2716
		rhashtable_walk_stop(&iter);
	} while (tsk == ERR_PTR(-EAGAIN));
2717 2718

	rhashtable_walk_exit(&iter);
2719 2720
}

2721
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2722
{
2723
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2724
	struct tipc_sock *tsk;
2725

2726
	rcu_read_lock();
2727
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2728 2729 2730
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2731

2732
	return tsk;
2733 2734
}

2735
static int tipc_sk_insert(struct tipc_sock *tsk)
2736
{
2737 2738 2739
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2740 2741
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2742

2743 2744 2745 2746 2747 2748
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2749 2750
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2751 2752
			return 0;
		sock_put(&tsk->sk);
2753 2754
	}

2755
	return -1;
2756 2757
}

2758
static void tipc_sk_remove(struct tipc_sock *tsk)
2759
{
2760
	struct sock *sk = &tsk->sk;
2761
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2762

2763
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2764
		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
2765
		__sock_put(sk);
2766 2767 2768
	}
}

2769 2770 2771 2772 2773 2774 2775
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2776
	.automatic_shrinking = true,
2777 2778
};

2779
int tipc_sk_rht_init(struct net *net)
2780
{
2781
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2782 2783

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2784 2785
}

2786
void tipc_sk_rht_destroy(struct net *net)
2787
{
2788 2789
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2790 2791
	/* Wait for socket readers to complete */
	synchronize_net();
2792

2793
	rhashtable_destroy(&tn->sk_rht);
2794 2795
}

J
Jon Maloy 已提交
2796 2797 2798 2799 2800 2801 2802 2803 2804 2805
static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
{
	struct net *net = sock_net(&tsk->sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_msg *hdr = &tsk->phdr;
	struct tipc_name_seq seq;
	int rc;

	if (mreq->type < TIPC_RESERVED_TYPES)
		return -EACCES;
2806 2807
	if (mreq->scope > TIPC_NODE_SCOPE)
		return -EINVAL;
J
Jon Maloy 已提交
2808 2809
	if (grp)
		return -EACCES;
2810
	grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
J
Jon Maloy 已提交
2811 2812 2813 2814 2815 2816 2817 2818 2819
	if (!grp)
		return -ENOMEM;
	tsk->group = grp;
	msg_set_lookup_scope(hdr, mreq->scope);
	msg_set_nametype(hdr, mreq->type);
	msg_set_dest_droppable(hdr, true);
	seq.type = mreq->type;
	seq.lower = mreq->instance;
	seq.upper = seq.lower;
2820
	tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
J
Jon Maloy 已提交
2821
	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
C
Cong Wang 已提交
2822
	if (rc) {
J
Jon Maloy 已提交
2823
		tipc_group_delete(net, grp);
C
Cong Wang 已提交
2824
		tsk->group = NULL;
2825
		return rc;
C
Cong Wang 已提交
2826
	}
2827
	/* Eliminate any risk that a broadcast overtakes sent JOINs */
2828 2829
	tsk->mc_method.rcast = true;
	tsk->mc_method.mandatory = true;
2830
	tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf);
J
Jon Maloy 已提交
2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849
	return rc;
}

static int tipc_sk_leave(struct tipc_sock *tsk)
{
	struct net *net = sock_net(&tsk->sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_name_seq seq;
	int scope;

	if (!grp)
		return -EINVAL;
	tipc_group_self(grp, &seq, &scope);
	tipc_group_delete(net, grp);
	tsk->group = NULL;
	tipc_sk_withdraw(tsk, scope, &seq);
	return 0;
}

P
Per Liden 已提交
2850
/**
2851
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2852 2853 2854 2855 2856
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2857 2858
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2859
 * (to ease compatibility).
2860
 *
P
Per Liden 已提交
2861 2862
 * Returns 0 on success, errno otherwise
 */
2863 2864
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2865
{
2866
	struct sock *sk = sock->sk;
2867
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2868
	struct tipc_group_req mreq;
2869
	u32 value = 0;
2870
	int res = 0;
P
Per Liden 已提交
2871

2872 2873
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2874 2875
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2876 2877 2878 2879 2880 2881 2882 2883

	switch (opt) {
	case TIPC_IMPORTANCE:
	case TIPC_SRC_DROPPABLE:
	case TIPC_DEST_DROPPABLE:
	case TIPC_CONN_TIMEOUT:
		if (ol < sizeof(value))
			return -EINVAL;
J
Jon Maloy 已提交
2884 2885 2886 2887 2888 2889 2890 2891
		if (get_user(value, (u32 __user *)ov))
			return -EFAULT;
		break;
	case TIPC_GROUP_JOIN:
		if (ol < sizeof(mreq))
			return -EINVAL;
		if (copy_from_user(&mreq, ov, sizeof(mreq)))
			return -EFAULT;
2892 2893 2894 2895 2896
		break;
	default:
		if (ov || ol)
			return -EINVAL;
	}
P
Per Liden 已提交
2897

2898
	lock_sock(sk);
2899

P
Per Liden 已提交
2900 2901
	switch (opt) {
	case TIPC_IMPORTANCE:
2902
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2903 2904 2905
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2906
			tsk_set_unreliable(tsk, value);
2907
		else
P
Per Liden 已提交
2908 2909 2910
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2911
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2912 2913
		break;
	case TIPC_CONN_TIMEOUT:
2914
		tipc_sk(sk)->conn_timeout = value;
P
Per Liden 已提交
2915
		break;
2916 2917 2918 2919 2920 2921 2922 2923
	case TIPC_MCAST_BROADCAST:
		tsk->mc_method.rcast = false;
		tsk->mc_method.mandatory = true;
		break;
	case TIPC_MCAST_REPLICAST:
		tsk->mc_method.rcast = true;
		tsk->mc_method.mandatory = true;
		break;
J
Jon Maloy 已提交
2924 2925 2926 2927 2928 2929
	case TIPC_GROUP_JOIN:
		res = tipc_sk_join(tsk, &mreq);
		break;
	case TIPC_GROUP_LEAVE:
		res = tipc_sk_leave(tsk);
		break;
P
Per Liden 已提交
2930 2931 2932 2933
	default:
		res = -EINVAL;
	}

2934 2935
	release_sock(sk);

P
Per Liden 已提交
2936 2937 2938 2939
	return res;
}

/**
2940
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2941 2942 2943 2944 2945
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2946 2947
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2948
 * (to ease compatibility).
2949
 *
P
Per Liden 已提交
2950 2951
 * Returns 0 on success, errno otherwise
 */
2952 2953
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2954
{
2955
	struct sock *sk = sock->sk;
2956
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2957 2958
	struct tipc_name_seq seq;
	int len, scope;
P
Per Liden 已提交
2959
	u32 value;
2960
	int res;
P
Per Liden 已提交
2961

2962 2963
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2964 2965
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2966 2967
	res = get_user(len, ol);
	if (res)
2968
		return res;
P
Per Liden 已提交
2969

2970
	lock_sock(sk);
P
Per Liden 已提交
2971 2972 2973

	switch (opt) {
	case TIPC_IMPORTANCE:
2974
		value = tsk_importance(tsk);
P
Per Liden 已提交
2975 2976
		break;
	case TIPC_SRC_DROPPABLE:
2977
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2978 2979
		break;
	case TIPC_DEST_DROPPABLE:
2980
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2981 2982
		break;
	case TIPC_CONN_TIMEOUT:
2983
		value = tsk->conn_timeout;
2984
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2985
		break;
2986
	case TIPC_NODE_RECVQ_DEPTH:
2987
		value = 0; /* was tipc_queue_size, now obsolete */
2988
		break;
2989
	case TIPC_SOCK_RECVQ_DEPTH:
2990 2991
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
J
Jon Maloy 已提交
2992 2993 2994 2995 2996 2997
	case TIPC_GROUP_JOIN:
		seq.type = 0;
		if (tsk->group)
			tipc_group_self(tsk->group, &seq, &scope);
		value = seq.type;
		break;
P
Per Liden 已提交
2998 2999 3000 3001
	default:
		res = -EINVAL;
	}

3002 3003
	release_sock(sk);

3004 3005
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
3006

3007 3008 3009 3010 3011 3012 3013
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
3014 3015
}

3016
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
3017
{
3018 3019
	struct net *net = sock_net(sock->sk);
	struct tipc_sioc_nodeid_req nr = {0};
E
Erik Hugne 已提交
3020 3021 3022 3023 3024 3025 3026
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
3027
		if (!tipc_node_get_linkname(net,
3028
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
3029 3030 3031 3032 3033 3034
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
3035 3036 3037 3038 3039 3040 3041 3042
	case SIOCGETNODEID:
		if (copy_from_user(&nr, argp, sizeof(nr)))
			return -EFAULT;
		if (!tipc_node_get_id(net, nr.peer, nr.node_id))
			return -EADDRNOTAVAIL;
		if (copy_to_user(argp, &nr, sizeof(nr)))
			return -EFAULT;
		return 0;
E
Erik Hugne 已提交
3043 3044 3045 3046 3047
	default:
		return -ENOIOCTLCMD;
	}
}

3048 3049 3050 3051
static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
{
	struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
	struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
E
Erik Hugne 已提交
3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066
	u32 onode = tipc_own_addr(sock_net(sock1->sk));

	tsk1->peer.family = AF_TIPC;
	tsk1->peer.addrtype = TIPC_ADDR_ID;
	tsk1->peer.scope = TIPC_NODE_SCOPE;
	tsk1->peer.addr.id.ref = tsk2->portid;
	tsk1->peer.addr.id.node = onode;
	tsk2->peer.family = AF_TIPC;
	tsk2->peer.addrtype = TIPC_ADDR_ID;
	tsk2->peer.scope = TIPC_NODE_SCOPE;
	tsk2->peer.addr.id.ref = tsk1->portid;
	tsk2->peer.addr.id.node = onode;

	tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
	tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
3067 3068 3069
	return 0;
}

3070 3071
/* Protocol switches for the various types of TIPC sockets */

3072
static const struct proto_ops msg_ops = {
3073
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3074
	.family		= AF_TIPC,
3075 3076 3077
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
E
Erik Hugne 已提交
3078
	.socketpair	= tipc_socketpair,
3079
	.accept		= sock_no_accept,
3080
	.getname	= tipc_getname,
3081
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3082
	.ioctl		= tipc_ioctl,
3083
	.listen		= sock_no_listen,
3084 3085 3086 3087 3088
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
3089 3090
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3091 3092
};

3093
static const struct proto_ops packet_ops = {
3094
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3095
	.family		= AF_TIPC,
3096 3097 3098
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
3099
	.socketpair	= tipc_socketpair,
3100 3101
	.accept		= tipc_accept,
	.getname	= tipc_getname,
3102
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3103
	.ioctl		= tipc_ioctl,
3104 3105 3106 3107 3108 3109
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
3110 3111
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3112 3113
};

3114
static const struct proto_ops stream_ops = {
3115
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3116
	.family		= AF_TIPC,
3117 3118 3119
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
3120
	.socketpair	= tipc_socketpair,
3121 3122
	.accept		= tipc_accept,
	.getname	= tipc_getname,
3123
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3124
	.ioctl		= tipc_ioctl,
3125 3126 3127 3128
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
3129
	.sendmsg	= tipc_sendstream,
3130
	.recvmsg	= tipc_recvstream,
3131 3132
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3133 3134
};

3135
static const struct net_proto_family tipc_family_ops = {
3136
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3137
	.family		= AF_TIPC,
3138
	.create		= tipc_sk_create
P
Per Liden 已提交
3139 3140 3141 3142 3143
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
3144 3145
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
3146 3147 3148
};

/**
3149
 * tipc_socket_init - initialize TIPC socket interface
3150
 *
P
Per Liden 已提交
3151 3152
 * Returns 0 on success, errno otherwise
 */
3153
int tipc_socket_init(void)
P
Per Liden 已提交
3154 3155 3156
{
	int res;

3157
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
3158
	if (res) {
3159
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
3160 3161 3162 3163 3164
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
3165
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
3166 3167 3168 3169 3170 3171 3172 3173
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
3174
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
3175
 */
3176
void tipc_socket_stop(void)
P
Per Liden 已提交
3177 3178 3179 3180
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
3181 3182

/* Caller should hold socket lock for the passed tipc socket. */
3183
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

3217 3218 3219 3220 3221 3222 3223
static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
			  *tsk)
{
	struct net *net = sock_net(skb->sk);
	struct sock *sk = &tsk->sk;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
3224
	    nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net)))
3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236
		return -EMSGSIZE;

	if (tipc_sk_connected(sk)) {
		if (__tipc_nl_add_sk_con(skb, tsk))
			return -EMSGSIZE;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			return -EMSGSIZE;
	}
	return 0;
}

3237
/* Caller should hold socket lock for the passed tipc socket. */
3238 3239
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
3240 3241
{
	struct nlattr *attrs;
3242
	void *hdr;
3243 3244

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3245
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
3246 3247 3248 3249 3250 3251
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
3252 3253

	if (__tipc_nl_add_sk_info(skb, tsk))
3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

3269 3270 3271 3272
int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
		    int (*skb_handler)(struct sk_buff *skb,
				       struct netlink_callback *cb,
				       struct tipc_sock *tsk))
3273
{
3274
	struct rhashtable_iter *iter = (void *)cb->args[4];
3275 3276
	struct tipc_sock *tsk;
	int err;
3277

C
Cong Wang 已提交
3278 3279 3280 3281 3282 3283
	rhashtable_walk_start(iter);
	while ((tsk = rhashtable_walk_next(iter)) != NULL) {
		if (IS_ERR(tsk)) {
			err = PTR_ERR(tsk);
			if (err == -EAGAIN) {
				err = 0;
3284 3285
				continue;
			}
C
Cong Wang 已提交
3286 3287
			break;
		}
3288

C
Cong Wang 已提交
3289 3290 3291 3292 3293 3294 3295 3296
		sock_hold(&tsk->sk);
		rhashtable_walk_stop(iter);
		lock_sock(&tsk->sk);
		err = skb_handler(skb, cb, tsk);
		if (err) {
			release_sock(&tsk->sk);
			sock_put(&tsk->sk);
			goto out;
3297
		}
C
Cong Wang 已提交
3298 3299 3300
		release_sock(&tsk->sk);
		rhashtable_walk_start(iter);
		sock_put(&tsk->sk);
3301
	}
C
Cong Wang 已提交
3302
	rhashtable_walk_stop(iter);
3303
out:
3304 3305
	return skb->len;
}
3306 3307
EXPORT_SYMBOL(tipc_nl_sk_walk);

C
Cong Wang 已提交
3308 3309
int tipc_dump_start(struct netlink_callback *cb)
{
3310 3311 3312 3313 3314 3315 3316 3317
	return __tipc_dump_start(cb, sock_net(cb->skb->sk));
}
EXPORT_SYMBOL(tipc_dump_start);

int __tipc_dump_start(struct netlink_callback *cb, struct net *net)
{
	/* tipc_nl_name_table_dump() uses cb->args[0...3]. */
	struct rhashtable_iter *iter = (void *)cb->args[4];
C
Cong Wang 已提交
3318 3319 3320 3321 3322 3323 3324
	struct tipc_net *tn = tipc_net(net);

	if (!iter) {
		iter = kmalloc(sizeof(*iter), GFP_KERNEL);
		if (!iter)
			return -ENOMEM;

3325
		cb->args[4] = (long)iter;
C
Cong Wang 已提交
3326 3327 3328 3329 3330 3331 3332 3333
	}

	rhashtable_walk_enter(&tn->sk_rht, iter);
	return 0;
}

int tipc_dump_done(struct netlink_callback *cb)
{
3334
	struct rhashtable_iter *hti = (void *)cb->args[4];
C
Cong Wang 已提交
3335 3336 3337 3338 3339 3340 3341

	rhashtable_walk_exit(hti);
	kfree(hti);
	return 0;
}
EXPORT_SYMBOL(tipc_dump_done);

3342 3343
int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
			   struct tipc_sock *tsk, u32 sk_filter_state,
3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364
			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
{
	struct sock *sk = &tsk->sk;
	struct nlattr *attrs;
	struct nlattr *stat;

	/*filter response w.r.t sk_state*/
	if (!(sk_filter_state & (1 << sk->sk_state)))
		return 0;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto msg_cancel;

	if (__tipc_nl_add_sk_info(skb, tsk))
		goto attr_msg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_UID,
3365
			from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk),
3366
					 sock_i_uid(sk))) ||
3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378
	    nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
			      tipc_diag_gen_cookie(sk),
			      TIPC_NLA_SOCK_PAD))
		goto attr_msg_cancel;

	stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
	if (!stat)
		goto attr_msg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
			skb_queue_len(&sk->sk_receive_queue)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
3379 3380 3381
			skb_queue_len(&sk->sk_write_queue)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
			atomic_read(&sk->sk_drops)))
3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392
		goto stat_msg_cancel;

	if (tsk->cong_link_cnt &&
	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
		goto stat_msg_cancel;

	if (tsk_conn_cong(tsk) &&
	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
		goto stat_msg_cancel;

	nla_nest_end(skb, stat);
3393 3394 3395 3396 3397

	if (tsk->group)
		if (tipc_group_fill_sock_diag(tsk->group, skb))
			goto stat_msg_cancel;

3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409
	nla_nest_end(skb, attrs);

	return 0;

stat_msg_cancel:
	nla_nest_cancel(skb, stat);
attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
msg_cancel:
	return -EMSGSIZE;
}
EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
3410

3411 3412
int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
3413
	return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
3414 3415
}

3416
/* Caller should hold socket lock for the passed tipc socket. */
3417 3418 3419
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
3420 3421 3422 3423 3424
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3425
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
3456 3457 3458
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
3459 3460 3461 3462 3463
{
	int err;
	struct publication *p;

	if (*last_publ) {
J
Jon Maloy 已提交
3464
		list_for_each_entry(p, &tsk->publications, binding_sock) {
3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
J
Jon Maloy 已提交
3481
				     binding_sock);
3482 3483
	}

J
Jon Maloy 已提交
3484
	list_for_each_entry_from(p, &tsk->publications, binding_sock) {
3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
3499
	u32 tsk_portid = cb->args[0];
3500 3501
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
3502
	struct net *net = sock_net(skb->sk);
3503 3504
	struct tipc_sock *tsk;

3505
	if (!tsk_portid) {
3506 3507 3508 3509 3510 3511 3512
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

3513 3514 3515
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

3516 3517
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
3518
				       tipc_nl_sock_policy, NULL);
3519 3520 3521 3522 3523 3524
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

3525
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
3526 3527 3528 3529 3530
	}

	if (done)
		return 0;

3531
	tsk = tipc_sk_lookup(net, tsk_portid);
3532 3533 3534 3535 3536 3537 3538 3539
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
3540
	sock_put(&tsk->sk);
3541

3542
	cb->args[0] = tsk_portid;
3543 3544 3545 3546 3547
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}