socket.c 88.8 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
J
Jon Maloy 已提交
4
 * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
38 39
#include <linux/sched/signal.h>

P
Per Liden 已提交
40
#include "core.h"
41
#include "name_table.h"
E
Erik Hugne 已提交
42
#include "node.h"
43
#include "link.h"
44
#include "name_distr.h"
45
#include "socket.h"
46
#include "bcast.h"
47
#include "netlink.h"
J
Jon Maloy 已提交
48
#include "group.h"
49

50
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
51
#define CONN_PROBING_INTV	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
52 53 54
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
55
#define TIPC_ACK_RATE		4       /* ACK at 1/4 of of rcv window size */
56

57 58
enum {
	TIPC_LISTEN = TCP_LISTEN,
59
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
60
	TIPC_OPEN = TCP_CLOSE,
61
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
62
	TIPC_CONNECTING = TCP_SYN_SENT,
63 64
};

65 66 67 68 69
struct sockaddr_pair {
	struct sockaddr_tipc sock;
	struct sockaddr_tipc member;
};

70 71 72 73 74 75 76
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
77
 * @portid: unique port identity in TIPC socket hash table
78
 * @phdr: preformatted message header used when sending messages
79
 * #cong_links: list of congested links
80
 * @publications: list of publications for port
81
 * @blocking_link: address of the congested link we are currently sleeping on
82 83 84 85
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
86
 * @cong_link_cnt: number of congested links
J
Jon Maloy 已提交
87
 * @snt_unacked: # messages sent by socket, and not yet acked by peer
88
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
89
 * @peer: 'connected' peer for dgram/rdm
90
 * @node: hash table node
91
 * @mc_method: cookie for use between socket and broadcast layer
92
 * @rcu: rcu struct for tipc_sock
93 94 95 96 97 98 99
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
100
	u32 portid;
101
	struct tipc_msg phdr;
102
	struct list_head cong_links;
103 104 105 106
	struct list_head publications;
	u32 pub_count;
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
107
	bool probe_unacked;
108
	u16 cong_link_cnt;
109 110
	u16 snt_unacked;
	u16 snd_win;
111
	u16 peer_caps;
112 113
	u16 rcv_unacked;
	u16 rcv_win;
114
	struct sockaddr_tipc peer;
115
	struct rhash_head node;
116
	struct tipc_mc_method mc_method;
117
	struct rcu_head rcu;
J
Jon Maloy 已提交
118
	struct tipc_group *group;
119
	bool group_is_open;
120
};
P
Per Liden 已提交
121

J
Jon Maloy 已提交
122
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
123
static void tipc_data_ready(struct sock *sk);
124
static void tipc_write_space(struct sock *sk);
125
static void tipc_sock_destruct(struct sock *sk);
126
static int tipc_release(struct socket *sock);
127 128
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern);
129
static void tipc_sk_timeout(struct timer_list *t);
130
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
131
			   struct tipc_name_seq const *seq);
132
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
133
			    struct tipc_name_seq const *seq);
J
Jon Maloy 已提交
134
static int tipc_sk_leave(struct tipc_sock *tsk);
135
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
136 137
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
138
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
139
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
140

141 142 143
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
144
static struct proto tipc_proto;
145 146
static const struct rhashtable_params tsk_rht_params;

147 148 149 150 151
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

152
static u32 tsk_peer_node(struct tipc_sock *tsk)
153
{
154
	return msg_destnode(&tsk->phdr);
155 156
}

157
static u32 tsk_peer_port(struct tipc_sock *tsk)
158
{
159
	return msg_destport(&tsk->phdr);
160 161
}

162
static  bool tsk_unreliable(struct tipc_sock *tsk)
163
{
164
	return msg_src_droppable(&tsk->phdr) != 0;
165 166
}

167
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
168
{
169
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
170 171
}

172
static bool tsk_unreturnable(struct tipc_sock *tsk)
173
{
174
	return msg_dest_droppable(&tsk->phdr) != 0;
175 176
}

177
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
178
{
179
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
180 181
}

182
static int tsk_importance(struct tipc_sock *tsk)
183
{
184
	return msg_importance(&tsk->phdr);
185 186
}

187
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
188 189 190
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
191
	msg_set_importance(&tsk->phdr, (u32)imp);
192 193
	return 0;
}
194

195 196 197 198 199
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

200
static bool tsk_conn_cong(struct tipc_sock *tsk)
201
{
202
	return tsk->snt_unacked > tsk->snd_win;
203 204
}

205 206 207 208 209
static u16 tsk_blocks(int len)
{
	return ((len / FLOWCTL_BLK_SZ) + 1);
}

210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
228 229
}

230
/**
231
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
232 233
 *
 * Caller must hold socket lock
P
Per Liden 已提交
234
 */
235
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
236
{
237
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
238 239
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
256
/**
257
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
258 259
 *
 * Caller must hold socket lock
P
Per Liden 已提交
260
 */
261
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
262
{
263
	struct sk_buff *skb;
264

265 266
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
267 268
}

269 270
static bool tipc_sk_connected(struct sock *sk)
{
271
	return sk->sk_state == TIPC_ESTABLISHED;
272 273
}

274 275 276 277 278 279 280 281 282 283
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

284
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
285 286 287 288
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
289
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
290
{
291
	struct sock *sk = &tsk->sk;
292
	u32 self = tipc_own_addr(sock_net(sk));
293
	u32 peer_port = tsk_peer_port(tsk);
294
	u32 orig_node, peer_node;
J
Jon Paul Maloy 已提交
295

296
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
297 298 299 300 301 302
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
303
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
304 305 306 307

	if (likely(orig_node == peer_node))
		return true;

308
	if (!orig_node && peer_node == self)
J
Jon Paul Maloy 已提交
309 310
		return true;

311
	if (!peer_node && orig_node == self)
J
Jon Paul Maloy 已提交
312 313 314 315 316
		return true;

	return false;
}

317 318 319 320 321 322 323 324 325
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
326
	int oldsk_state = sk->sk_state;
327 328 329
	int res = -EINVAL;

	switch (state) {
330 331 332
	case TIPC_OPEN:
		res = 0;
		break;
333
	case TIPC_LISTEN:
334
	case TIPC_CONNECTING:
335
		if (oldsk_state == TIPC_OPEN)
336 337
			res = 0;
		break;
338
	case TIPC_ESTABLISHED:
339
		if (oldsk_state == TIPC_CONNECTING ||
340
		    oldsk_state == TIPC_OPEN)
341 342
			res = 0;
		break;
343
	case TIPC_DISCONNECTING:
344
		if (oldsk_state == TIPC_CONNECTING ||
345 346 347
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
348 349 350 351 352 353 354 355
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
#define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
({                                                                             \
	struct sock *sk_;						       \
	int rc_;							       \
									       \
	while ((rc_ = !(condition_))) {					       \
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
		sk_ = (sock_)->sk;					       \
		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
		if (rc_)						       \
			break;						       \
		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
		release_sock(sk_);					       \
		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
		sched_annotate_sleep();				               \
		lock_sock(sk_);						       \
		remove_wait_queue(sk_sleep(sk_), &wait_);		       \
	}								       \
	rc_;								       \
397 398
})

P
Per Liden 已提交
399
/**
400
 * tipc_sk_create - create a TIPC socket
401
 * @net: network namespace (must be default network)
P
Per Liden 已提交
402 403
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
404
 * @kern: caused by kernel or by userspace?
405
 *
406 407
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
408 409 410
 *
 * Returns 0 on success, errno otherwise
 */
411 412
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
413
{
414
	struct tipc_net *tn;
415
	const struct proto_ops *ops;
P
Per Liden 已提交
416
	struct sock *sk;
417
	struct tipc_sock *tsk;
418
	struct tipc_msg *msg;
419 420

	/* Validate arguments */
P
Per Liden 已提交
421 422 423 424 425
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
426
		ops = &stream_ops;
P
Per Liden 已提交
427 428
		break;
	case SOCK_SEQPACKET:
429
		ops = &packet_ops;
P
Per Liden 已提交
430 431 432
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
433
		ops = &msg_ops;
P
Per Liden 已提交
434
		break;
435 436
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
437 438
	}

439
	/* Allocate socket's protocol area */
440
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
441
	if (sk == NULL)
P
Per Liden 已提交
442 443
		return -ENOMEM;

444
	tsk = tipc_sk(sk);
445 446
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
447
	INIT_LIST_HEAD(&tsk->cong_links);
448
	msg = &tsk->phdr;
449
	tn = net_generic(sock_net(sk), tipc_net_id);
P
Per Liden 已提交
450

451 452 453
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
454
	tipc_set_sk_state(sk, TIPC_OPEN);
455
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
456
		pr_warn("Socket create failed; port number exhausted\n");
457 458
		return -EINVAL;
	}
459 460 461 462

	/* Ensure tsk is visible before we read own_addr. */
	smp_mb();

463 464
	tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE,
		      TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
465

466
	msg_set_origport(msg, tsk->portid);
467
	timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
468
	sk->sk_shutdown = 0;
J
Jon Maloy 已提交
469
	sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
470
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
471 472
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
473
	sk->sk_destruct = tipc_sock_destruct;
474
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
475
	tsk->group_is_open = true;
476
	atomic_set(&tsk->dupl_rcvcnt, 0);
477

478 479 480 481
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

482
	if (tipc_sk_type_connectionless(sk)) {
483
		tsk_set_unreturnable(tsk, true);
484
		if (sock->type == SOCK_DGRAM)
485
			tsk_set_unreliable(tsk, true);
486
	}
487

P
Per Liden 已提交
488 489 490
	return 0;
}

491 492 493 494 495 496 497
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

498 499 500 501 502 503
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
504
	long timeout = CONN_TIMEOUT_DEFAULT;
505 506 507
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

508 509 510 511
	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
					    !tsk_conn_cong(tsk)));

512 513 514 515 516 517
	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
518
			continue;
519
		}
520 521 522 523 524 525
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
526
	}
527 528 529 530

	if (tipc_sk_type_connectionless(sk))
		return;

531 532 533 534 535 536 537
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
538 539
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
540 541 542
	}
}

P
Per Liden 已提交
543
/**
544
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
545 546 547 548 549 550 551
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
552
 *
P
Per Liden 已提交
553 554 555 556 557 558
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
559
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
560 561
{
	struct sock *sk = sock->sk;
562
	struct tipc_sock *tsk;
P
Per Liden 已提交
563

564 565 566 567 568
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
569
		return 0;
570

571
	tsk = tipc_sk(sk);
572 573
	lock_sock(sk);

574 575
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
J
Jon Maloy 已提交
576
	tipc_sk_leave(tsk);
577
	tipc_sk_withdraw(tsk, 0, NULL);
578
	sk_stop_timer(sk, &sk->sk_timer);
579
	tipc_sk_remove(tsk);
P
Per Liden 已提交
580

581 582
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
J
Jon Maloy 已提交
583
	tipc_dest_list_purge(&tsk->cong_links);
584
	tsk->cong_link_cnt = 0;
585
	call_rcu(&tsk->rcu, tipc_sk_callback);
586
	sock->sk = NULL;
P
Per Liden 已提交
587

588
	return 0;
P
Per Liden 已提交
589 590 591
}

/**
592
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
593 594 595
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
596
 *
P
Per Liden 已提交
597 598 599
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
600
 *
P
Per Liden 已提交
601
 * Returns 0 on success, errno otherwise
602 603 604
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
605
 */
606 607
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
608
{
609
	struct sock *sk = sock->sk;
P
Per Liden 已提交
610
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
611
	struct tipc_sock *tsk = tipc_sk(sk);
612
	int res = -EINVAL;
P
Per Liden 已提交
613

614 615
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
616
		res = tipc_sk_withdraw(tsk, 0, NULL);
617 618
		goto exit;
	}
J
Jon Maloy 已提交
619 620 621 622
	if (tsk->group) {
		res = -EACCES;
		goto exit;
	}
623 624 625 626 627 628 629 630
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
631 632 633

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
634 635 636 637
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
638

639
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
640
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
641 642 643 644
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
645

J
Jon Maloy 已提交
646
	res = (addr->scope >= 0) ?
647 648
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
649 650 651
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
652 653
}

654
/**
655
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
656 657 658
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
659
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
660
 *
P
Per Liden 已提交
661
 * Returns 0 on success, errno otherwise
662
 *
663 664
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
665
 *       a completely predictable manner).
P
Per Liden 已提交
666
 */
667
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
668
			int peer)
P
Per Liden 已提交
669 670
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
671 672
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
P
Per Liden 已提交
673

674
	memset(addr, 0, sizeof(*addr));
675
	if (peer) {
676
		if ((!tipc_sk_connected(sk)) &&
677
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
678
			return -ENOTCONN;
679 680
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
681
	} else {
682
		addr->addr.id.ref = tsk->portid;
683
		addr->addr.id.node = tipc_own_addr(sock_net(sk));
684
	}
P
Per Liden 已提交
685 686 687 688 689 690

	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

691
	return sizeof(*addr);
P
Per Liden 已提交
692 693 694
}

/**
695
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
696 697
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
698
 * @wait: ???
P
Per Liden 已提交
699
 *
700 701 702 703 704 705 706 707
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
708 709 710
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
711
 */
712 713
static __poll_t tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
714
{
715
	struct sock *sk = sock->sk;
716
	struct tipc_sock *tsk = tipc_sk(sk);
A
Al Viro 已提交
717
	__poll_t revents = 0;
718

719 720
	sock_poll_wait(file, sk_sleep(sk), wait);

721
	if (sk->sk_shutdown & RCV_SHUTDOWN)
722
		revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
723
	if (sk->sk_shutdown == SHUTDOWN_MASK)
724
		revents |= EPOLLHUP;
725

726 727
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
728
	case TIPC_CONNECTING:
729
		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
730
			revents |= EPOLLOUT;
731 732
		/* fall thru' */
	case TIPC_LISTEN:
J
Jon Maloy 已提交
733
		if (!skb_queue_empty(&sk->sk_receive_queue))
734
			revents |= EPOLLIN | EPOLLRDNORM;
735 736
		break;
	case TIPC_OPEN:
737
		if (tsk->group_is_open && !tsk->cong_link_cnt)
738
			revents |= EPOLLOUT;
739 740
		if (!tipc_sk_type_connectionless(sk))
			break;
J
Jon Maloy 已提交
741
		if (skb_queue_empty(&sk->sk_receive_queue))
742
			break;
743
		revents |= EPOLLIN | EPOLLRDNORM;
744 745
		break;
	case TIPC_DISCONNECTING:
746
		revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
747
		break;
748
	}
749
	return revents;
P
Per Liden 已提交
750 751
}

752 753 754 755
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
756
 * @msg: message to send
757 758
 * @dlen: length of data to send
 * @timeout: timeout to wait for wakeup
759 760 761 762 763
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
764
			  struct msghdr *msg, size_t dlen, long timeout)
765 766
{
	struct sock *sk = sock->sk;
767
	struct tipc_sock *tsk = tipc_sk(sk);
768
	struct tipc_msg *hdr = &tsk->phdr;
769
	struct net *net = sock_net(sk);
770
	int mtu = tipc_bcast_get_mtu(net);
771
	struct tipc_mc_method *method = &tsk->mc_method;
772
	struct sk_buff_head pkts;
773
	struct tipc_nlist dsts;
774 775
	int rc;

J
Jon Maloy 已提交
776 777 778
	if (tsk->group)
		return -EACCES;

779
	/* Block or return if any destination link is congested */
780 781 782
	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;
783

784 785 786
	/* Lookup destination nodes */
	tipc_nlist_init(&dsts, tipc_own_addr(net));
	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
787
				      seq->upper, &dsts);
788 789 790 791
	if (!dsts.local && !dsts.remote)
		return -EHOSTUNREACH;

	/* Build message header */
792
	msg_set_type(hdr, TIPC_MCAST_MSG);
793
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
794 795 796 797 798 799 800
	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nametype(hdr, seq->type);
	msg_set_namelower(hdr, seq->lower);
	msg_set_nameupper(hdr, seq->upper);

801
	/* Build message as chain of buffers */
802 803
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
804

805 806
	/* Send message if build was successful */
	if (unlikely(rc == dlen))
807
		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
808 809 810
				     &tsk->cong_link_cnt);

	tipc_nlist_purge(&dsts);
811 812

	return rc ? rc : dlen;
813 814
}

815 816 817 818 819 820 821 822 823 824 825 826 827
/**
 * tipc_send_group_msg - send a message to a member in the group
 * @net: network namespace
 * @m: message to send
 * @mb: group member
 * @dnode: destination node
 * @dport: destination port
 * @dlen: total length of message data
 */
static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
			       struct msghdr *m, struct tipc_member *mb,
			       u32 dnode, u32 dport, int dlen)
{
828
	u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
829
	struct tipc_mc_method *method = &tsk->mc_method;
830 831 832 833 834 835 836 837 838 839
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_msg *hdr = &tsk->phdr;
	struct sk_buff_head pkts;
	int mtu, rc;

	/* Complete message header */
	msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
	msg_set_destport(hdr, dport);
	msg_set_destnode(hdr, dnode);
840
	msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
841 842 843 844 845 846 847 848 849 850 851 852 853 854 855

	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
		tipc_dest_push(&tsk->cong_links, dnode, 0);
		tsk->cong_link_cnt++;
	}

856
	/* Update send window */
857 858
	tipc_group_update_member(mb, blks);

859 860 861
	/* A broadcast sent within next EXPIRE period must follow same path */
	method->rcast = true;
	method->mandatory = true;
862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
	return dlen;
}

/**
 * tipc_send_group_unicast - send message to a member in the group
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
				   int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct net *net = sock_net(sk);
	struct tipc_member *mb = NULL;
	u32 node, port;
	int rc;

	node = dest->addr.id.node;
	port = dest->addr.id.ref;
	if (!port && !node)
		return -EHOSTUNREACH;

	/* Block or return if destination link or member is congested */
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(&tsk->cong_links, node, 0) &&
				!tipc_group_cong(grp, node, port, blks, &mb));
	if (unlikely(rc))
		return rc;

	if (unlikely(!mb))
		return -EHOSTUNREACH;

	rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);

	return rc ? rc : dlen;
}

908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926
/**
 * tipc_send_group_anycast - send message to any member with given identity
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
				   int dlen, long timeout)
{
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct list_head *cong_links = &tsk->cong_links;
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_group *grp = tsk->group;
927
	struct tipc_msg *hdr = &tsk->phdr;
928 929 930 931 932
	struct tipc_member *first = NULL;
	struct tipc_member *mbr = NULL;
	struct net *net = sock_net(sk);
	u32 node, port, exclude;
	struct list_head dsts;
933
	u32 type, inst, scope;
934 935 936 937 938 939
	int lookups = 0;
	int dstcnt, rc;
	bool cong;

	INIT_LIST_HEAD(&dsts);

940
	type = msg_nametype(hdr);
941
	inst = dest->addr.name.name.instance;
942
	scope = msg_lookup_scope(hdr);
943 944 945 946 947 948 949
	exclude = tipc_group_exclude(grp);

	while (++lookups < 4) {
		first = NULL;

		/* Look for a non-congested destination member, if any */
		while (1) {
950
			if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
						 &dstcnt, exclude, false))
				return -EHOSTUNREACH;
			tipc_dest_pop(&dsts, &node, &port);
			cong = tipc_group_cong(grp, node, port, blks, &mbr);
			if (!cong)
				break;
			if (mbr == first)
				break;
			if (!first)
				first = mbr;
		}

		/* Start over if destination was not in member list */
		if (unlikely(!mbr))
			continue;

		if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
			break;

		/* Block or return if destination link or member is congested */
		rc = tipc_wait_for_cond(sock, &timeout,
					!tipc_dest_find(cong_links, node, 0) &&
					!tipc_group_cong(grp, node, port,
							 blks, &mbr));
		if (unlikely(rc))
			return rc;

		/* Send, unless destination disappeared while waiting */
		if (likely(mbr))
			break;
	}

	if (unlikely(lookups >= 4))
		return -EHOSTUNREACH;

	rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);

	return rc ? rc : dlen;
}

J
Jon Maloy 已提交
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003
/**
 * tipc_send_group_bcast - send message to all members in communication group
 * @sk: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
1004
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
J
Jon Maloy 已提交
1005 1006 1007 1008 1009 1010
	struct sock *sk = sock->sk;
	struct net *net = sock_net(sk);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_nlist *dsts = tipc_group_dests(grp);
	struct tipc_mc_method *method = &tsk->mc_method;
1011
	bool ack = method->mandatory && method->rcast;
1012
	int blks = tsk_blocks(MCAST_H_SIZE + dlen);
J
Jon Maloy 已提交
1013 1014 1015 1016 1017 1018 1019 1020
	struct tipc_msg *hdr = &tsk->phdr;
	int mtu = tipc_bcast_get_mtu(net);
	struct sk_buff_head pkts;
	int rc = -EHOSTUNREACH;

	if (!dsts->local && !dsts->remote)
		return -EHOSTUNREACH;

1021 1022 1023
	/* Block or return if any destination link or member is congested */
	rc = tipc_wait_for_cond(sock, &timeout,	!tsk->cong_link_cnt &&
				!tipc_group_bc_cong(grp, blks));
J
Jon Maloy 已提交
1024 1025 1026 1027
	if (unlikely(rc))
		return rc;

	/* Complete message header */
1028 1029 1030 1031 1032 1033 1034
	if (dest) {
		msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
		msg_set_nameinst(hdr, dest->addr.name.name.instance);
	} else {
		msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
		msg_set_nameinst(hdr, 0);
	}
1035
	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
J
Jon Maloy 已提交
1036 1037 1038 1039
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));

1040 1041 1042
	/* Avoid getting stuck with repeated forced replicasts */
	msg_set_grp_bc_ack_req(hdr, ack);

J
Jon Maloy 已提交
1043 1044 1045 1046 1047 1048 1049
	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
1050
	rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
J
Jon Maloy 已提交
1051 1052 1053
	if (unlikely(rc))
		return rc;

1054
	/* Update broadcast sequence number and send windows */
1055 1056 1057 1058 1059 1060
	tipc_group_update_bc_members(tsk->group, blks, ack);

	/* Broadcast link is now free to choose method for next broadcast */
	method->mandatory = false;
	method->expires = jiffies;

J
Jon Maloy 已提交
1061 1062 1063
	return dlen;
}

1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
/**
 * tipc_send_group_mcast - send message to all members with given identity
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
1081
	struct tipc_msg *hdr = &tsk->phdr;
1082
	struct net *net = sock_net(sk);
1083
	u32 type, inst, scope, exclude;
1084
	struct list_head dsts;
1085
	u32 dstcnt;
1086 1087 1088

	INIT_LIST_HEAD(&dsts);

1089 1090 1091
	type = msg_nametype(hdr);
	inst = dest->addr.name.name.instance;
	scope = msg_lookup_scope(hdr);
1092
	exclude = tipc_group_exclude(grp);
1093 1094 1095

	if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
				 &dstcnt, exclude, true))
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
		return -EHOSTUNREACH;

	if (dstcnt == 1) {
		tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
		return tipc_send_group_unicast(sock, m, dlen, timeout);
	}

	tipc_dest_list_purge(&dsts);
	return tipc_send_group_bcast(sock, m, dlen, timeout);
}

1107 1108 1109 1110 1111 1112
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
1113
 */
1114 1115
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
1116
{
J
Jon Maloy 已提交
1117
	u32 self = tipc_own_addr(net);
1118
	u32 type, lower, upper, scope;
1119
	struct sk_buff *skb, *_skb;
J
Jon Maloy 已提交
1120
	u32 portid, oport, onode;
1121
	struct sk_buff_head tmpq;
J
Jon Maloy 已提交
1122
	struct list_head dports;
1123 1124 1125
	struct tipc_msg *hdr;
	int user, mtyp, hlen;
	bool exact;
1126

1127
	__skb_queue_head_init(&tmpq);
1128
	INIT_LIST_HEAD(&dports);
1129

1130 1131
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
1132 1133 1134 1135 1136 1137 1138 1139
		hdr = buf_msg(skb);
		user = msg_user(hdr);
		mtyp = msg_type(hdr);
		hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
		oport = msg_origport(hdr);
		onode = msg_orignode(hdr);
		type = msg_nametype(hdr);

1140 1141 1142 1143 1144 1145
		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
			spin_lock_bh(&inputq->lock);
			if (skb_peek(arrvq) == skb) {
				__skb_dequeue(arrvq);
				__skb_queue_tail(inputq, skb);
			}
J
Jon Maloy 已提交
1146
			kfree_skb(skb);
1147 1148 1149
			spin_unlock_bh(&inputq->lock);
			continue;
		}
1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165

		/* Group messages require exact scope match */
		if (msg_in_group(hdr)) {
			lower = 0;
			upper = ~0;
			scope = msg_lookup_scope(hdr);
			exact = true;
		} else {
			/* TIPC_NODE_SCOPE means "any scope" in this context */
			if (onode == self)
				scope = TIPC_NODE_SCOPE;
			else
				scope = TIPC_CLUSTER_SCOPE;
			exact = false;
			lower = msg_namelower(hdr);
			upper = msg_nameupper(hdr);
J
Jon Maloy 已提交
1166
		}
1167 1168 1169 1170 1171 1172

		/* Create destination port list: */
		tipc_nametbl_mc_lookup(net, type, lower, upper,
				       scope, exact, &dports);

		/* Clone message per destination */
J
Jon Maloy 已提交
1173
		while (tipc_dest_pop(&dports, NULL, &portid)) {
1174
			_skb = __pskb_copy(skb, hlen, GFP_ATOMIC);
1175 1176 1177 1178 1179 1180
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
1181
		}
1182 1183 1184 1185 1186 1187 1188 1189 1190
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
1191
	}
1192
	tipc_sk_rcv(net, inputq);
1193 1194
}

1195
/**
J
Jon Maloy 已提交
1196
 * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
1197
 * @tsk: receiving socket
1198
 * @skb: pointer to message buffer.
1199
 */
J
Jon Maloy 已提交
1200 1201
static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
				   struct sk_buff_head *xmitq)
1202
{
1203
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1204 1205
	u32 onode = tsk_own_node(tsk);
	struct sock *sk = &tsk->sk;
1206
	int mtyp = msg_type(hdr);
1207
	bool conn_cong;
1208

1209
	/* Ignore if connection cannot be validated: */
1210
	if (!tsk_peer_msg(tsk, hdr))
1211 1212
		goto exit;

1213 1214 1215 1216 1217 1218 1219 1220
	if (unlikely(msg_errcode(hdr))) {
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
				      tsk_peer_port(tsk));
		sk->sk_state_change(sk);
		goto exit;
	}

1221
	tsk->probe_unacked = false;
1222

1223 1224
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
1225 1226
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
1227 1228
		return;
	} else if (mtyp == CONN_ACK) {
1229
		conn_cong = tsk_conn_cong(tsk);
1230 1231 1232
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
1233
		if (conn_cong)
1234 1235 1236
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
1237 1238
	}
exit:
1239
	kfree_skb(skb);
1240 1241
}

P
Per Liden 已提交
1242
/**
1243
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
1244 1245
 * @sock: socket structure
 * @m: message to send
1246
 * @dsz: amount of user data to be sent
1247
 *
P
Per Liden 已提交
1248
 * Message must have an destination specified explicitly.
1249
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
1250 1251
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
1252
 *
P
Per Liden 已提交
1253 1254
 * Returns the number of bytes sent on success, or errno otherwise
 */
1255
static int tipc_sendmsg(struct socket *sock,
1256
			struct msghdr *m, size_t dsz)
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

1268
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1269
{
1270
	struct sock *sk = sock->sk;
1271
	struct net *net = sock_net(sk);
1272 1273 1274 1275 1276
	struct tipc_sock *tsk = tipc_sk(sk);
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct list_head *clinks = &tsk->cong_links;
	bool syn = !tipc_sk_type_connectionless(sk);
J
Jon Maloy 已提交
1277
	struct tipc_group *grp = tsk->group;
1278
	struct tipc_msg *hdr = &tsk->phdr;
1279
	struct tipc_name_seq *seq;
1280
	struct sk_buff_head pkts;
1281
	u32 dport, dnode = 0;
J
Jon Maloy 已提交
1282
	u32 type, inst;
1283
	int mtu, rc;
P
Per Liden 已提交
1284

1285
	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
1286
		return -EMSGSIZE;
1287

1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
	if (likely(dest)) {
		if (unlikely(m->msg_namelen < sizeof(*dest)))
			return -EINVAL;
		if (unlikely(dest->family != AF_TIPC))
			return -EINVAL;
	}

	if (grp) {
		if (!dest)
			return tipc_send_group_bcast(sock, m, dlen, timeout);
1298 1299
		if (dest->addrtype == TIPC_ADDR_NAME)
			return tipc_send_group_anycast(sock, m, dlen, timeout);
1300 1301
		if (dest->addrtype == TIPC_ADDR_ID)
			return tipc_send_group_unicast(sock, m, dlen, timeout);
1302 1303
		if (dest->addrtype == TIPC_ADDR_MCAST)
			return tipc_send_group_mcast(sock, m, dlen, timeout);
1304 1305
		return -EINVAL;
	}
J
Jon Maloy 已提交
1306

1307
	if (unlikely(!dest)) {
1308 1309
		dest = &tsk->peer;
		if (!syn || dest->family != AF_TIPC)
1310 1311
			return -EDESTADDRREQ;
	}
1312 1313

	if (unlikely(syn)) {
1314
		if (sk->sk_state == TIPC_LISTEN)
1315
			return -EPIPE;
1316
		if (sk->sk_state != TIPC_OPEN)
1317 1318 1319
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
1320
		if (dest->addrtype == TIPC_ADDR_NAME) {
1321 1322
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
1323
		}
P
Per Liden 已提交
1324
	}
1325

1326 1327 1328
	seq = &dest->addr.nameseq;
	if (dest->addrtype == TIPC_ADDR_MCAST)
		return tipc_sendmcast(sock, seq, m, dlen, timeout);
1329

1330 1331 1332
	if (dest->addrtype == TIPC_ADDR_NAME) {
		type = dest->addr.name.name.type;
		inst = dest->addr.name.name.instance;
J
Jon Maloy 已提交
1333
		dnode = dest->addr.name.domain;
1334 1335 1336 1337
		msg_set_type(hdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
		msg_set_nametype(hdr, type);
		msg_set_nameinst(hdr, inst);
J
Jon Maloy 已提交
1338
		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
1339
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
1340 1341
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dport);
1342 1343
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
1344 1345
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
1346 1347 1348 1349 1350
		msg_set_type(hdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(hdr, 0);
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dest->addr.id.ref);
		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
1351 1352
	} else {
		return -EINVAL;
1353 1354
	}

1355
	/* Block or return if destination link is congested */
J
Jon Maloy 已提交
1356 1357
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(clinks, dnode, 0));
1358 1359 1360 1361
	if (unlikely(rc))
		return rc;

	skb_queue_head_init(&pkts);
1362
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
1363 1364
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
1365
		return rc;
1366

1367 1368
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
J
Jon Maloy 已提交
1369
		tipc_dest_push(clinks, dnode, 0);
1370 1371 1372
		tsk->cong_link_cnt++;
		rc = 0;
	}
1373

1374 1375 1376 1377
	if (unlikely(syn && !rc))
		tipc_set_sk_state(sk, TIPC_CONNECTING);

	return rc ? rc : dlen;
P
Per Liden 已提交
1378 1379
}

1380
/**
1381
 * tipc_sendstream - send stream-oriented data
P
Per Liden 已提交
1382
 * @sock: socket structure
1383 1384
 * @m: data to send
 * @dsz: total length of data to be transmitted
1385
 *
1386
 * Used for SOCK_STREAM data.
1387
 *
1388 1389
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1390
 */
1391
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1392 1393 1394 1395 1396
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
1397
	ret = __tipc_sendstream(sock, m, dsz);
1398 1399 1400 1401 1402
	release_sock(sk);

	return ret;
}

1403
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1404
{
1405
	struct sock *sk = sock->sk;
1406
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1407 1408 1409 1410 1411 1412 1413 1414
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = &tsk->phdr;
	struct net *net = sock_net(sk);
	struct sk_buff_head pkts;
	u32 dnode = tsk_peer_node(tsk);
	int send, sent = 0;
	int rc = 0;
1415

1416
	skb_queue_head_init(&pkts);
1417

1418 1419
	if (unlikely(dlen > INT_MAX))
		return -EMSGSIZE;
1420

1421 1422 1423 1424 1425
	/* Handle implicit connection setup */
	if (unlikely(dest)) {
		rc = __tipc_sendmsg(sock, m, dlen);
		if (dlen && (dlen == rc))
			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1426
		return rc;
1427
	}
1428

1429
	do {
1430 1431
		rc = tipc_wait_for_cond(sock, &timeout,
					(!tsk->cong_link_cnt &&
1432 1433
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451
		if (unlikely(rc))
			break;

		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
		if (unlikely(rc != send))
			break;

		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
		if (unlikely(rc == -ELINKCONG)) {
			tsk->cong_link_cnt = 1;
			rc = 0;
		}
		if (likely(!rc)) {
			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
			sent += send;
		}
	} while (sent < dlen && !rc);
1452

1453
	return sent ? sent : rc;
P
Per Liden 已提交
1454 1455
}

1456
/**
1457
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1458
 * @sock: socket structure
1459 1460
 * @m: message to send
 * @dsz: length of data to be transmitted
1461
 *
1462
 * Used for SOCK_SEQPACKET messages.
1463
 *
1464
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1465
 */
1466
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1467
{
1468 1469
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1470

1471
	return tipc_sendstream(sock, m, dsz);
P
Per Liden 已提交
1472 1473
}

1474
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1475
 */
1476
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1477
				u32 peer_node)
P
Per Liden 已提交
1478
{
1479 1480
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1481
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1482

1483 1484 1485 1486 1487
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1488

1489
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
1490
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1491 1492
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1493
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1494 1495 1496 1497 1498 1499
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1500 1501 1502
}

/**
1503
 * tipc_sk_set_orig_addr - capture sender's address for received message
P
Per Liden 已提交
1504
 * @m: descriptor for message info
1505
 * @hdr: received message header
1506
 *
P
Per Liden 已提交
1507 1508
 * Note: Address is not captured if not requested by receiver.
 */
1509
static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
P
Per Liden 已提交
1510
{
1511 1512 1513 1514 1515 1516 1517 1518
	DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
	struct tipc_msg *hdr = buf_msg(skb);

	if (!srcaddr)
		return;

	srcaddr->sock.family = AF_TIPC;
	srcaddr->sock.addrtype = TIPC_ADDR_ID;
1519
	srcaddr->sock.scope = 0;
1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
	srcaddr->sock.addr.id.ref = msg_origport(hdr);
	srcaddr->sock.addr.id.node = msg_orignode(hdr);
	srcaddr->sock.addr.name.domain = 0;
	m->msg_namelen = sizeof(struct sockaddr_tipc);

	if (!msg_in_group(hdr))
		return;

	/* Group message users may also want to know sending member's id */
	srcaddr->member.family = AF_TIPC;
	srcaddr->member.addrtype = TIPC_ADDR_NAME;
1531
	srcaddr->member.scope = 0;
1532 1533 1534 1535
	srcaddr->member.addr.name.name.type = msg_nametype(hdr);
	srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
	srcaddr->member.addr.name.domain = 0;
	m->msg_namelen = sizeof(*srcaddr);
P
Per Liden 已提交
1536 1537 1538
}

/**
1539
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1540 1541
 * @m: descriptor for message info
 * @msg: received message header
1542
 * @tsk: TIPC port associated with message
1543
 *
P
Per Liden 已提交
1544
 * Note: Ancillary data is not captured if not requested by receiver.
1545
 *
P
Per Liden 已提交
1546 1547
 * Returns 0 if successful, otherwise errno
 */
1548 1549
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1550 1551 1552 1553
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1554
	int has_name;
P
Per Liden 已提交
1555 1556 1557 1558 1559 1560 1561 1562 1563 1564
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1565 1566
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1567
			return res;
1568 1569 1570 1571 1572 1573
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1574 1575 1576 1577 1578 1579
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1580
		has_name = 1;
P
Per Liden 已提交
1581 1582 1583 1584 1585
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1586
		has_name = 1;
P
Per Liden 已提交
1587 1588 1589 1590 1591
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1592 1593 1594 1595
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1596 1597
		break;
	default:
1598
		has_name = 0;
P
Per Liden 已提交
1599
	}
1600 1601 1602 1603 1604
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1605 1606 1607 1608

	return 0;
}

1609
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1610
{
1611 1612
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1613
	struct sk_buff *skb = NULL;
1614
	struct tipc_msg *msg;
1615 1616
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1617

1618
	if (!tipc_sk_connected(sk))
1619
		return;
1620 1621 1622
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1623
	if (!skb)
1624
		return;
1625
	msg = buf_msg(skb);
1626 1627 1628 1629 1630 1631 1632 1633
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1634
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1635 1636
}

1637
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1638 1639 1640
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1641
	long timeo = *timeop;
1642 1643 1644 1645
	int err = sock_error(sk);

	if (err)
		return err;
Y
Ying Xue 已提交
1646 1647 1648

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1649
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1650
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1664 1665 1666
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
1667 1668 1669 1670

		err = sock_error(sk);
		if (err)
			break;
Y
Ying Xue 已提交
1671 1672
	}
	finish_wait(sk_sleep(sk), &wait);
1673
	*timeop = timeo;
Y
Ying Xue 已提交
1674 1675 1676
	return err;
}

1677
/**
1678
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1679
 * @m: descriptor for message info
1680
 * @buflen: length of user buffer area
P
Per Liden 已提交
1681
 * @flags: receive flags
1682
 *
P
Per Liden 已提交
1683 1684 1685 1686 1687
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1688 1689
static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
			size_t buflen,	int flags)
P
Per Liden 已提交
1690
{
1691
	struct sock *sk = sock->sk;
1692
	bool connected = !tipc_sk_type_connectionless(sk);
1693
	struct tipc_sock *tsk = tipc_sk(sk);
1694
	int rc, err, hlen, dlen, copy;
1695
	struct sk_buff_head xmitq;
1696 1697 1698
	struct tipc_msg *hdr;
	struct sk_buff *skb;
	bool grp_evt;
1699
	long timeout;
P
Per Liden 已提交
1700

1701
	/* Catch invalid receive requests */
1702
	if (unlikely(!buflen))
P
Per Liden 已提交
1703 1704
		return -EINVAL;

1705
	lock_sock(sk);
1706 1707
	if (unlikely(connected && sk->sk_state == TIPC_OPEN)) {
		rc = -ENOTCONN;
P
Per Liden 已提交
1708 1709
		goto exit;
	}
1710
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1711

1712
	/* Step rcv queue to first msg with data or error; wait if necessary */
1713 1714 1715 1716 1717 1718 1719 1720 1721
	do {
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			goto exit;
		skb = skb_peek(&sk->sk_receive_queue);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1722
		grp_evt = msg_is_grp_evt(hdr);
1723 1724
		if (likely(dlen || err))
			break;
1725
		tsk_advance_rx_queue(sk);
1726
	} while (1);
P
Per Liden 已提交
1727

1728
	/* Collect msg meta data, including error code and rejected data */
1729
	tipc_sk_set_orig_addr(m, skb);
1730 1731
	rc = tipc_sk_anc_data_recv(m, hdr, tsk);
	if (unlikely(rc))
P
Per Liden 已提交
1732 1733
		goto exit;

1734 1735 1736 1737
	/* Capture data if non-error msg, otherwise just set return value */
	if (likely(!err)) {
		copy = min_t(int, dlen, buflen);
		if (unlikely(copy != dlen))
P
Per Liden 已提交
1738
			m->msg_flags |= MSG_TRUNC;
1739
		rc = skb_copy_datagram_msg(skb, hlen, m, copy);
P
Per Liden 已提交
1740
	} else {
1741 1742 1743 1744
		copy = 0;
		rc = 0;
		if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
			rc = -ECONNRESET;
P
Per Liden 已提交
1745
	}
1746 1747
	if (unlikely(rc))
		goto exit;
P
Per Liden 已提交
1748

1749 1750 1751 1752 1753 1754 1755 1756
	/* Mark message as group event if applicable */
	if (unlikely(grp_evt)) {
		if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
			m->msg_flags |= MSG_EOR;
		m->msg_flags |= MSG_OOB;
		copy = 0;
	}

1757
	/* Caption of data or error code/rejected data was successful */
1758 1759 1760
	if (unlikely(flags & MSG_PEEK))
		goto exit;

1761 1762 1763 1764 1765 1766 1767 1768 1769
	/* Send group flow control advertisement when applicable */
	if (tsk->group && msg_in_group(hdr) && !grp_evt) {
		skb_queue_head_init(&xmitq);
		tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
					  msg_orignode(hdr), msg_origport(hdr),
					  &xmitq);
		tipc_node_distr_xmit(sock_net(sk), &xmitq);
	}

1770
	tsk_advance_rx_queue(sk);
1771

1772 1773 1774
	if (likely(!connected))
		goto exit;

1775
	/* Send connection flow control advertisement when applicable */
1776 1777 1778
	tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
	if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
		tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1779
exit:
1780
	release_sock(sk);
1781
	return rc ? rc : copy;
P
Per Liden 已提交
1782 1783
}

1784
/**
1785
 * tipc_recvstream - receive stream-oriented data
P
Per Liden 已提交
1786
 * @m: descriptor for message info
1787
 * @buflen: total size of user buffer area
P
Per Liden 已提交
1788
 * @flags: receive flags
1789 1790
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1791 1792 1793 1794
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1795 1796
static int tipc_recvstream(struct socket *sock, struct msghdr *m,
			   size_t buflen, int flags)
P
Per Liden 已提交
1797
{
1798
	struct sock *sk = sock->sk;
1799
	struct tipc_sock *tsk = tipc_sk(sk);
1800 1801 1802 1803 1804 1805 1806
	struct sk_buff *skb;
	struct tipc_msg *hdr;
	struct tipc_skb_cb *skb_cb;
	bool peek = flags & MSG_PEEK;
	int offset, required, copy, copied = 0;
	int hlen, dlen, err, rc;
	long timeout;
P
Per Liden 已提交
1807

1808
	/* Catch invalid receive attempts */
1809
	if (unlikely(!buflen))
P
Per Liden 已提交
1810 1811
		return -EINVAL;

1812
	lock_sock(sk);
P
Per Liden 已提交
1813

1814
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1815
		rc = -ENOTCONN;
Y
Ying Xue 已提交
1816
		goto exit;
P
Per Liden 已提交
1817
	}
1818 1819
	required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen);
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1820

1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831
	do {
		/* Look at first msg in receive queue; wait if necessary */
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			break;
		skb = skb_peek(&sk->sk_receive_queue);
		skb_cb = TIPC_SKB_CB(skb);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1832

1833 1834 1835 1836 1837
		/* Discard any empty non-errored (SYN-) message */
		if (unlikely(!dlen && !err)) {
			tsk_advance_rx_queue(sk);
			continue;
		}
1838

1839 1840
		/* Collect msg meta data, incl. error code and rejected data */
		if (!copied) {
1841
			tipc_sk_set_orig_addr(m, skb);
1842 1843 1844 1845
			rc = tipc_sk_anc_data_recv(m, hdr, tsk);
			if (rc)
				break;
		}
P
Per Liden 已提交
1846

1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866
		/* Copy data if msg ok, otherwise return error/partial data */
		if (likely(!err)) {
			offset = skb_cb->bytes_read;
			copy = min_t(int, dlen - offset, buflen - copied);
			rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
			if (unlikely(rc))
				break;
			copied += copy;
			offset += copy;
			if (unlikely(offset < dlen)) {
				if (!peek)
					skb_cb->bytes_read = offset;
				break;
			}
		} else {
			rc = 0;
			if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control)
				rc = -ECONNRESET;
			if (copied || rc)
				break;
P
Per Liden 已提交
1867 1868
		}

1869 1870
		if (unlikely(peek))
			break;
P
Per Liden 已提交
1871

1872
		tsk_advance_rx_queue(sk);
1873

1874 1875 1876 1877
		/* Send connection flow control advertisement when applicable */
		tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
		if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
			tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1878

1879 1880 1881
		/* Exit if all requested data or FIN/error received */
		if (copied == buflen || err)
			break;
P
Per Liden 已提交
1882

1883
	} while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required);
P
Per Liden 已提交
1884
exit:
1885
	release_sock(sk);
1886
	return copied ? copied : rc;
P
Per Liden 已提交
1887 1888
}

1889 1890 1891 1892 1893 1894 1895 1896 1897 1898
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1899
	if (skwq_has_sleeper(wq))
1900 1901
		wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
						EPOLLWRNORM | EPOLLWRBAND);
1902 1903 1904 1905 1906 1907 1908 1909
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1910
static void tipc_data_ready(struct sock *sk)
1911 1912 1913 1914 1915
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1916
	if (skwq_has_sleeper(wq))
1917 1918
		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN |
						EPOLLRDNORM | EPOLLRDBAND);
1919 1920 1921
	rcu_read_unlock();
}

1922 1923 1924 1925 1926
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

J
Jon Maloy 已提交
1927 1928 1929 1930 1931 1932 1933
static void tipc_sk_proto_rcv(struct sock *sk,
			      struct sk_buff_head *inputq,
			      struct sk_buff_head *xmitq)
{
	struct sk_buff *skb = __skb_dequeue(inputq);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1934
	struct tipc_group *grp = tsk->group;
1935
	bool wakeup = false;
J
Jon Maloy 已提交
1936 1937 1938 1939 1940 1941

	switch (msg_user(hdr)) {
	case CONN_MANAGER:
		tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
		return;
	case SOCK_WAKEUP:
J
Jon Maloy 已提交
1942
		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
J
Jon Maloy 已提交
1943
		tsk->cong_link_cnt--;
1944
		wakeup = true;
J
Jon Maloy 已提交
1945
		break;
J
Jon Maloy 已提交
1946
	case GROUP_PROTOCOL:
1947
		tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
J
Jon Maloy 已提交
1948
		break;
J
Jon Maloy 已提交
1949
	case TOP_SRV:
1950
		tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
1951
				      hdr, inputq, xmitq);
J
Jon Maloy 已提交
1952 1953 1954 1955 1956
		break;
	default:
		break;
	}

1957 1958 1959
	if (wakeup)
		sk->sk_write_space(sk);

J
Jon Maloy 已提交
1960 1961 1962
	kfree_skb(skb);
}

1963
/**
J
Jon Maloy 已提交
1964
 * tipc_filter_connect - Handle incoming message for a connection-based socket
1965
 * @tsk: TIPC socket
1966
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1967
 *
1968
 * Returns true if everything ok, false otherwise
1969
 */
J
Jon Maloy 已提交
1970
static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1971
{
1972
	struct sock *sk = &tsk->sk;
1973
	struct net *net = sock_net(sk);
1974
	struct tipc_msg *hdr = buf_msg(skb);
1975 1976
	u32 pport = msg_origport(hdr);
	u32 pnode = msg_orignode(hdr);
1977

1978 1979
	if (unlikely(msg_mcast(hdr)))
		return false;
1980

1981 1982
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1983
		/* Accept only ACK or NACK message */
1984 1985 1986 1987 1988 1989 1990 1991 1992 1993
		if (unlikely(!msg_connected(hdr))) {
			if (pport != tsk_peer_port(tsk) ||
			    pnode != tsk_peer_node(tsk))
				return false;

			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			sk->sk_err = ECONNREFUSED;
			sk->sk_state_change(sk);
			return true;
		}
1994

1995
		if (unlikely(msg_errcode(hdr))) {
1996
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1997
			sk->sk_err = ECONNREFUSED;
1998
			sk->sk_state_change(sk);
1999
			return true;
2000 2001
		}

2002
		if (unlikely(!msg_isdata(hdr))) {
2003
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2004
			sk->sk_err = EINVAL;
2005
			sk->sk_state_change(sk);
2006
			return true;
2007 2008
		}

2009 2010
		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
		msg_set_importance(&tsk->phdr, msg_importance(hdr));
2011

2012 2013 2014 2015 2016
		/* If 'ACK+' message, add to socket receive queue */
		if (msg_data_sz(hdr))
			return true;

		/* If empty 'ACK-' message, wake up sleeping connect() */
2017
		sk->sk_data_ready(sk);
2018 2019 2020 2021 2022

		/* 'ACK-' message is neither accepted nor rejected: */
		msg_set_dest_droppable(hdr, 1);
		return false;

2023
	case TIPC_OPEN:
2024
	case TIPC_DISCONNECTING:
2025 2026
		break;
	case TIPC_LISTEN:
2027
		/* Accept only SYN message */
2028 2029
		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
			return true;
2030
		break;
2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
		if (unlikely(!tsk_peer_msg(tsk, hdr)))
			return false;

		if (unlikely(msg_errcode(hdr))) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			/* Let timer expire on it's own */
			tipc_node_remove_conn(net, tsk_peer_node(tsk),
					      tsk->portid);
			sk->sk_state_change(sk);
		}
		return true;
2044
	default:
2045
		pr_err("Unknown sk_state %u\n", sk->sk_state);
2046
	}
2047

2048
	return false;
2049 2050
}

2051 2052 2053
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
2054
 * @skb: message
2055
 *
2056 2057
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
2058
 *
2059 2060
 * For connectionless messages, queue limits are based on message
 * importance as follows:
2061
 *
2062 2063 2064 2065
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
2066 2067 2068
 *
 * Returns overload limit according to corresponding message importance
 */
2069
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
2070
{
2071 2072 2073
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

2074 2075 2076
	if (unlikely(msg_in_group(hdr)))
		return sk->sk_rcvbuf;

2077 2078
	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
2079

2080 2081
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
2082

2083
	return FLOWCTL_MSG_LIM;
2084 2085
}

2086
/**
J
Jon Maloy 已提交
2087
 * tipc_sk_filter_rcv - validate incoming message
2088
 * @sk: socket
2089
 * @skb: pointer to message.
2090
 *
2091 2092 2093
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
2094
 * Called with socket lock already taken
2095
 *
P
Per Liden 已提交
2096
 */
J
Jon Maloy 已提交
2097 2098
static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
			       struct sk_buff_head *xmitq)
P
Per Liden 已提交
2099
{
J
Jon Maloy 已提交
2100
	bool sk_conn = !tipc_sk_type_connectionless(sk);
2101
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2102
	struct tipc_group *grp = tsk->group;
2103
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
2104 2105 2106
	struct net *net = sock_net(sk);
	struct sk_buff_head inputq;
	int limit, err = TIPC_OK;
2107

J
Jon Maloy 已提交
2108 2109 2110
	TIPC_SKB_CB(skb)->bytes_read = 0;
	__skb_queue_head_init(&inputq);
	__skb_queue_tail(&inputq, skb);
2111

J
Jon Maloy 已提交
2112 2113
	if (unlikely(!msg_isdata(hdr)))
		tipc_sk_proto_rcv(sk, &inputq, xmitq);
2114

J
Jon Maloy 已提交
2115 2116 2117
	if (unlikely(grp))
		tipc_group_filter_msg(grp, &inputq, xmitq);

J
Jon Maloy 已提交
2118 2119 2120 2121 2122
	/* Validate and add to receive buffer if there is space */
	while ((skb = __skb_dequeue(&inputq))) {
		hdr = buf_msg(skb);
		limit = rcvbuf_limit(sk, skb);
		if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
J
Jon Maloy 已提交
2123 2124
		    (!sk_conn && msg_connected(hdr)) ||
		    (!grp && msg_in_group(hdr)))
2125
			err = TIPC_ERR_NO_PORT;
2126 2127
		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
			atomic_inc(&sk->sk_drops);
J
Jon Maloy 已提交
2128
			err = TIPC_ERR_OVERLOAD;
2129
		}
P
Per Liden 已提交
2130

J
Jon Maloy 已提交
2131 2132 2133 2134 2135 2136 2137 2138
		if (unlikely(err)) {
			tipc_skb_reject(net, err, skb, xmitq);
			err = TIPC_OK;
			continue;
		}
		__skb_queue_tail(&sk->sk_receive_queue, skb);
		skb_set_owner_r(skb, sk);
		sk->sk_data_ready(sk);
2139
	}
2140
}
P
Per Liden 已提交
2141

2142
/**
J
Jon Maloy 已提交
2143
 * tipc_sk_backlog_rcv - handle incoming message from backlog queue
2144
 * @sk: socket
2145
 * @skb: message
2146
 *
2147
 * Caller must hold socket lock
2148
 */
J
Jon Maloy 已提交
2149
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
2150
{
J
Jon Maloy 已提交
2151
	unsigned int before = sk_rmem_alloc_get(sk);
J
Jon Paul Maloy 已提交
2152
	struct sk_buff_head xmitq;
J
Jon Maloy 已提交
2153
	unsigned int added;
2154

J
Jon Paul Maloy 已提交
2155 2156
	__skb_queue_head_init(&xmitq);

J
Jon Maloy 已提交
2157 2158 2159
	tipc_sk_filter_rcv(sk, skb, &xmitq);
	added = sk_rmem_alloc_get(sk) - before;
	atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
2160

J
Jon Maloy 已提交
2161
	/* Send pending response/rejected messages, if any */
2162
	tipc_node_distr_xmit(sock_net(sk), &xmitq);
2163 2164 2165
	return 0;
}

2166
/**
2167 2168 2169 2170 2171
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
2172 2173 2174
 *
 * Caller must hold socket lock
 */
2175
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
2176
			    u32 dport, struct sk_buff_head *xmitq)
2177
{
J
Jon Paul Maloy 已提交
2178 2179
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
2180 2181
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
2182
	u32 onode;
2183 2184

	while (skb_queue_len(inputq)) {
2185
		if (unlikely(time_after_eq(jiffies, time_limit)))
2186 2187
			return;

2188 2189
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
2190 2191 2192
			return;

		/* Add message directly to receive queue if possible */
2193
		if (!sock_owned_by_user(sk)) {
J
Jon Maloy 已提交
2194
			tipc_sk_filter_rcv(sk, skb, xmitq);
2195
			continue;
2196
		}
2197 2198

		/* Try backlog, compensating for double-counted bytes */
2199
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
2200
		if (!sk->sk_backlog.len)
2201 2202 2203 2204
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
2205 2206

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
2207
		onode = tipc_own_addr(sock_net(sk));
2208
		atomic_inc(&sk->sk_drops);
J
Jon Paul Maloy 已提交
2209 2210
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
2211
		break;
2212
	}
2213 2214
}

2215
/**
2216 2217 2218 2219
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
2220
 */
2221
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
2222
{
J
Jon Paul Maloy 已提交
2223
	struct sk_buff_head xmitq;
2224
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
2225
	int err;
2226 2227
	struct tipc_sock *tsk;
	struct sock *sk;
2228
	struct sk_buff *skb;
2229

J
Jon Paul Maloy 已提交
2230
	__skb_queue_head_init(&xmitq);
2231 2232 2233
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
2234

2235 2236 2237
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
2238
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
2239 2240
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
2241
			/* Send pending response/rejected messages, if any */
2242
			tipc_node_distr_xmit(sock_net(sk), &xmitq);
2243 2244 2245
			sock_put(sk);
			continue;
		}
2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257
		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
2258
			continue;
2259
xmit:
2260
		dnode = msg_destnode(buf_msg(skb));
2261
		tipc_node_xmit_skb(net, skb, dnode, dport);
2262
	}
P
Per Liden 已提交
2263 2264
}

Y
Ying Xue 已提交
2265 2266
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
2267
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
2280
		add_wait_queue(sk_sleep(sk), &wait);
2281
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
2282 2283
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
2284 2285 2286 2287
	} while (!done);
	return 0;
}

P
Per Liden 已提交
2288
/**
2289
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
2290 2291 2292
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
2293
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
2294 2295 2296
 *
 * Returns 0 on success, errno otherwise
 */
2297 2298
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
2299
{
2300
	struct sock *sk = sock->sk;
2301
	struct tipc_sock *tsk = tipc_sk(sk);
2302 2303
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
2304
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
2305
	int previous;
2306
	int res = 0;
2307

2308 2309 2310
	if (destlen != sizeof(struct sockaddr_tipc))
		return -EINVAL;

2311 2312
	lock_sock(sk);

J
Jon Maloy 已提交
2313 2314 2315 2316 2317
	if (tsk->group) {
		res = -EINVAL;
		goto exit;
	}

2318 2319 2320
	if (dst->family == AF_UNSPEC) {
		memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
		if (!tipc_sk_type_connectionless(sk))
2321
			res = -EINVAL;
2322
		goto exit;
2323 2324
	} else if (dst->family != AF_TIPC) {
		res = -EINVAL;
2325
	}
2326
	if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
2327
		res = -EINVAL;
2328 2329 2330 2331 2332 2333
	if (res)
		goto exit;

	/* DGRAM/RDM connect(), just save the destaddr */
	if (tipc_sk_type_connectionless(sk)) {
		memcpy(&tsk->peer, dest, destlen);
2334 2335 2336
		goto exit;
	}

2337
	previous = sk->sk_state;
2338 2339 2340

	switch (sk->sk_state) {
	case TIPC_OPEN:
2341 2342 2343 2344 2345 2346 2347 2348 2349 2350
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

2351
		res = __tipc_sendmsg(sock, &m, 0);
2352 2353 2354
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

2355
		/* Just entered TIPC_CONNECTING state; the only
2356 2357 2358 2359
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
2360 2361 2362 2363 2364
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
2365
			goto exit;
2366
		}
Y
Ying Xue 已提交
2367 2368 2369
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
2370 2371
		break;
	case TIPC_ESTABLISHED:
2372
		res = -EISCONN;
2373 2374
		break;
	default:
2375
		res = -EINVAL;
2376
	}
2377

2378 2379
exit:
	release_sock(sk);
2380
	return res;
P
Per Liden 已提交
2381 2382
}

2383
/**
2384
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
2385 2386
 * @sock: socket structure
 * @len: (unused)
2387
 *
P
Per Liden 已提交
2388 2389
 * Returns 0 on success, errno otherwise
 */
2390
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
2391
{
2392 2393 2394 2395
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
2396
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
2397
	release_sock(sk);
2398

2399
	return res;
P
Per Liden 已提交
2400 2401
}

Y
Ying Xue 已提交
2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
2416
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
2417 2418 2419 2420 2421 2422 2423 2424 2425 2426
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
2427 2428 2429
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
2430 2431 2432 2433 2434
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2435
/**
2436
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2437 2438 2439
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2440
 *
P
Per Liden 已提交
2441 2442
 * Returns 0 on success, errno otherwise
 */
2443 2444
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern)
P
Per Liden 已提交
2445
{
2446
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2447
	struct sk_buff *buf;
2448
	struct tipc_sock *new_tsock;
2449
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2450
	long timeo;
2451
	int res;
P
Per Liden 已提交
2452

2453
	lock_sock(sk);
P
Per Liden 已提交
2454

2455
	if (sk->sk_state != TIPC_LISTEN) {
2456
		res = -EINVAL;
P
Per Liden 已提交
2457 2458
		goto exit;
	}
Y
Ying Xue 已提交
2459 2460 2461 2462
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2463 2464 2465

	buf = skb_peek(&sk->sk_receive_queue);

2466
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
2467 2468
	if (res)
		goto exit;
2469
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2470

2471
	new_sk = new_sock->sk;
2472
	new_tsock = tipc_sk(new_sk);
2473
	msg = buf_msg(buf);
P
Per Liden 已提交
2474

2475 2476 2477 2478 2479 2480 2481
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2482
	tsk_rej_rx_queue(new_sk);
2483 2484

	/* Connect new socket to it's peer */
2485
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2486

2487
	tsk_set_importance(new_tsock, msg_importance(msg));
2488
	if (msg_named(msg)) {
2489 2490
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2491
	}
2492 2493 2494 2495 2496 2497 2498 2499

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2500
		tsk_advance_rx_queue(sk);
2501
		__tipc_sendstream(new_sock, &m, 0);
2502 2503 2504
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2505
		skb_set_owner_r(buf, new_sk);
2506 2507
	}
	release_sock(new_sk);
P
Per Liden 已提交
2508
exit:
2509
	release_sock(sk);
P
Per Liden 已提交
2510 2511 2512 2513
	return res;
}

/**
2514
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2515
 * @sock: socket structure
2516
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2517 2518
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2519
 *
P
Per Liden 已提交
2520 2521
 * Returns 0 on success, errno otherwise
 */
2522
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2523
{
2524
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2525 2526
	int res;

2527 2528
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2529

2530
	lock_sock(sk);
P
Per Liden 已提交
2531

2532 2533
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2534

2535
	if (sk->sk_state == TIPC_DISCONNECTING) {
2536
		/* Discard any unreceived messages */
2537
		__skb_queue_purge(&sk->sk_receive_queue);
2538 2539 2540

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2541
		res = 0;
2542
	} else {
P
Per Liden 已提交
2543 2544 2545
		res = -ENOTCONN;
	}

2546
	release_sock(sk);
P
Per Liden 已提交
2547 2548 2549
	return res;
}

2550
static void tipc_sk_timeout(struct timer_list *t)
2551
{
2552 2553
	struct sock *sk = from_timer(sk, t, sk_timer);
	struct tipc_sock *tsk = tipc_sk(sk);
2554 2555 2556 2557 2558
	u32 peer_port = tsk_peer_port(tsk);
	u32 peer_node = tsk_peer_node(tsk);
	u32 own_node = tsk_own_node(tsk);
	u32 own_port = tsk->portid;
	struct net *net = sock_net(sk);
2559
	struct sk_buff *skb = NULL;
2560

J
Jon Paul Maloy 已提交
2561
	bh_lock_sock(sk);
2562 2563 2564 2565 2566 2567
	if (!tipc_sk_connected(sk))
		goto exit;

	/* Try again later if socket is busy */
	if (sock_owned_by_user(sk)) {
		sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
J
Jon Paul Maloy 已提交
2568
		goto exit;
2569 2570
	}

2571
	if (tsk->probe_unacked) {
2572 2573 2574
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		tipc_node_remove_conn(net, peer_node, peer_port);
		sk->sk_state_change(sk);
2575
		goto exit;
2576
	}
2577 2578 2579 2580
	/* Send new probe */
	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
			      peer_node, own_node, peer_port, own_port,
			      TIPC_OK);
2581
	tsk->probe_unacked = true;
2582 2583
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
exit:
2584
	bh_unlock_sock(sk);
2585
	if (skb)
2586
		tipc_node_xmit_skb(net, skb, peer_node, own_port);
2587
	sock_put(sk);
2588 2589
}

2590
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2591 2592
			   struct tipc_name_seq const *seq)
{
2593 2594
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2595 2596 2597
	struct publication *publ;
	u32 key;

J
Jon Maloy 已提交
2598 2599 2600
	if (scope != TIPC_NODE_SCOPE)
		scope = TIPC_CLUSTER_SCOPE;

2601
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2602
		return -EINVAL;
2603 2604
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2605 2606
		return -EADDRINUSE;

2607
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2608
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2609 2610 2611
	if (unlikely(!publ))
		return -EINVAL;

J
Jon Maloy 已提交
2612
	list_add(&publ->binding_sock, &tsk->publications);
2613 2614
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2615 2616 2617
	return 0;
}

2618
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2619 2620
			    struct tipc_name_seq const *seq)
{
2621
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2622 2623 2624 2625
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

J
Jon Maloy 已提交
2626 2627 2628
	if (scope != TIPC_NODE_SCOPE)
		scope = TIPC_CLUSTER_SCOPE;

J
Jon Maloy 已提交
2629
	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
J
Jon Paul Maloy 已提交
2630 2631 2632 2633 2634 2635 2636 2637 2638
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2639
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
2640
					      publ->upper, publ->key);
J
Jon Paul Maloy 已提交
2641 2642 2643
			rc = 0;
			break;
		}
2644
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
2645
				      publ->upper, publ->key);
J
Jon Paul Maloy 已提交
2646 2647
		rc = 0;
	}
2648 2649
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2650 2651 2652
	return rc;
}

2653 2654 2655
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2656
void tipc_sk_reinit(struct net *net)
2657
{
2658
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2659
	struct rhashtable_iter iter;
2660
	struct tipc_sock *tsk;
2661 2662
	struct tipc_msg *msg;

2663 2664 2665
	rhashtable_walk_enter(&tn->sk_rht, &iter);

	do {
2666
		rhashtable_walk_start(&iter);
2667 2668

		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
2669 2670
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2671 2672
			msg_set_prevnode(msg, tipc_own_addr(net));
			msg_set_orignode(msg, tipc_own_addr(net));
2673 2674
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2675

2676 2677
		rhashtable_walk_stop(&iter);
	} while (tsk == ERR_PTR(-EAGAIN));
2678 2679
}

2680
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2681
{
2682
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2683
	struct tipc_sock *tsk;
2684

2685
	rcu_read_lock();
2686
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2687 2688 2689
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2690

2691
	return tsk;
2692 2693
}

2694
static int tipc_sk_insert(struct tipc_sock *tsk)
2695
{
2696 2697 2698
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2699 2700
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2701

2702 2703 2704 2705 2706 2707
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2708 2709
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2710 2711
			return 0;
		sock_put(&tsk->sk);
2712 2713
	}

2714
	return -1;
2715 2716
}

2717
static void tipc_sk_remove(struct tipc_sock *tsk)
2718
{
2719
	struct sock *sk = &tsk->sk;
2720
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2721

2722
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2723
		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
2724
		__sock_put(sk);
2725 2726 2727
	}
}

2728 2729 2730 2731 2732 2733 2734
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2735
	.automatic_shrinking = true,
2736 2737
};

2738
int tipc_sk_rht_init(struct net *net)
2739
{
2740
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2741 2742

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2743 2744
}

2745
void tipc_sk_rht_destroy(struct net *net)
2746
{
2747 2748
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2749 2750
	/* Wait for socket readers to complete */
	synchronize_net();
2751

2752
	rhashtable_destroy(&tn->sk_rht);
2753 2754
}

J
Jon Maloy 已提交
2755 2756 2757 2758 2759 2760 2761 2762 2763 2764
static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
{
	struct net *net = sock_net(&tsk->sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_msg *hdr = &tsk->phdr;
	struct tipc_name_seq seq;
	int rc;

	if (mreq->type < TIPC_RESERVED_TYPES)
		return -EACCES;
2765 2766
	if (mreq->scope > TIPC_NODE_SCOPE)
		return -EINVAL;
J
Jon Maloy 已提交
2767 2768
	if (grp)
		return -EACCES;
2769
	grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
J
Jon Maloy 已提交
2770 2771 2772 2773 2774 2775 2776 2777 2778
	if (!grp)
		return -ENOMEM;
	tsk->group = grp;
	msg_set_lookup_scope(hdr, mreq->scope);
	msg_set_nametype(hdr, mreq->type);
	msg_set_dest_droppable(hdr, true);
	seq.type = mreq->type;
	seq.lower = mreq->instance;
	seq.upper = seq.lower;
2779
	tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
J
Jon Maloy 已提交
2780
	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
C
Cong Wang 已提交
2781
	if (rc) {
J
Jon Maloy 已提交
2782
		tipc_group_delete(net, grp);
C
Cong Wang 已提交
2783
		tsk->group = NULL;
2784
		return rc;
C
Cong Wang 已提交
2785
	}
2786
	/* Eliminate any risk that a broadcast overtakes sent JOINs */
2787 2788
	tsk->mc_method.rcast = true;
	tsk->mc_method.mandatory = true;
2789
	tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf);
J
Jon Maloy 已提交
2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808
	return rc;
}

static int tipc_sk_leave(struct tipc_sock *tsk)
{
	struct net *net = sock_net(&tsk->sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_name_seq seq;
	int scope;

	if (!grp)
		return -EINVAL;
	tipc_group_self(grp, &seq, &scope);
	tipc_group_delete(net, grp);
	tsk->group = NULL;
	tipc_sk_withdraw(tsk, scope, &seq);
	return 0;
}

P
Per Liden 已提交
2809
/**
2810
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2811 2812 2813 2814 2815
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2816 2817
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2818
 * (to ease compatibility).
2819
 *
P
Per Liden 已提交
2820 2821
 * Returns 0 on success, errno otherwise
 */
2822 2823
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2824
{
2825
	struct sock *sk = sock->sk;
2826
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2827
	struct tipc_group_req mreq;
2828
	u32 value = 0;
2829
	int res = 0;
P
Per Liden 已提交
2830

2831 2832
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2833 2834
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2835 2836 2837 2838 2839 2840 2841 2842

	switch (opt) {
	case TIPC_IMPORTANCE:
	case TIPC_SRC_DROPPABLE:
	case TIPC_DEST_DROPPABLE:
	case TIPC_CONN_TIMEOUT:
		if (ol < sizeof(value))
			return -EINVAL;
J
Jon Maloy 已提交
2843 2844 2845 2846 2847 2848 2849 2850
		if (get_user(value, (u32 __user *)ov))
			return -EFAULT;
		break;
	case TIPC_GROUP_JOIN:
		if (ol < sizeof(mreq))
			return -EINVAL;
		if (copy_from_user(&mreq, ov, sizeof(mreq)))
			return -EFAULT;
2851 2852 2853 2854 2855
		break;
	default:
		if (ov || ol)
			return -EINVAL;
	}
P
Per Liden 已提交
2856

2857
	lock_sock(sk);
2858

P
Per Liden 已提交
2859 2860
	switch (opt) {
	case TIPC_IMPORTANCE:
2861
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2862 2863 2864
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2865
			tsk_set_unreliable(tsk, value);
2866
		else
P
Per Liden 已提交
2867 2868 2869
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2870
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2871 2872
		break;
	case TIPC_CONN_TIMEOUT:
2873
		tipc_sk(sk)->conn_timeout = value;
P
Per Liden 已提交
2874
		break;
2875 2876 2877 2878 2879 2880 2881 2882
	case TIPC_MCAST_BROADCAST:
		tsk->mc_method.rcast = false;
		tsk->mc_method.mandatory = true;
		break;
	case TIPC_MCAST_REPLICAST:
		tsk->mc_method.rcast = true;
		tsk->mc_method.mandatory = true;
		break;
J
Jon Maloy 已提交
2883 2884 2885 2886 2887 2888
	case TIPC_GROUP_JOIN:
		res = tipc_sk_join(tsk, &mreq);
		break;
	case TIPC_GROUP_LEAVE:
		res = tipc_sk_leave(tsk);
		break;
P
Per Liden 已提交
2889 2890 2891 2892
	default:
		res = -EINVAL;
	}

2893 2894
	release_sock(sk);

P
Per Liden 已提交
2895 2896 2897 2898
	return res;
}

/**
2899
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2900 2901 2902 2903 2904
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2905 2906
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2907
 * (to ease compatibility).
2908
 *
P
Per Liden 已提交
2909 2910
 * Returns 0 on success, errno otherwise
 */
2911 2912
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2913
{
2914
	struct sock *sk = sock->sk;
2915
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2916 2917
	struct tipc_name_seq seq;
	int len, scope;
P
Per Liden 已提交
2918
	u32 value;
2919
	int res;
P
Per Liden 已提交
2920

2921 2922
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2923 2924
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2925 2926
	res = get_user(len, ol);
	if (res)
2927
		return res;
P
Per Liden 已提交
2928

2929
	lock_sock(sk);
P
Per Liden 已提交
2930 2931 2932

	switch (opt) {
	case TIPC_IMPORTANCE:
2933
		value = tsk_importance(tsk);
P
Per Liden 已提交
2934 2935
		break;
	case TIPC_SRC_DROPPABLE:
2936
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2937 2938
		break;
	case TIPC_DEST_DROPPABLE:
2939
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2940 2941
		break;
	case TIPC_CONN_TIMEOUT:
2942
		value = tsk->conn_timeout;
2943
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2944
		break;
2945
	case TIPC_NODE_RECVQ_DEPTH:
2946
		value = 0; /* was tipc_queue_size, now obsolete */
2947
		break;
2948
	case TIPC_SOCK_RECVQ_DEPTH:
2949 2950
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
J
Jon Maloy 已提交
2951 2952 2953 2954 2955 2956
	case TIPC_GROUP_JOIN:
		seq.type = 0;
		if (tsk->group)
			tipc_group_self(tsk->group, &seq, &scope);
		value = seq.type;
		break;
P
Per Liden 已提交
2957 2958 2959 2960
	default:
		res = -EINVAL;
	}

2961 2962
	release_sock(sk);

2963 2964
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2965

2966 2967 2968 2969 2970 2971 2972
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2973 2974
}

2975
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2976
{
2977 2978
	struct net *net = sock_net(sock->sk);
	struct tipc_sioc_nodeid_req nr = {0};
E
Erik Hugne 已提交
2979 2980 2981 2982 2983 2984 2985
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
2986
		if (!tipc_node_get_linkname(net,
2987
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
2988 2989 2990 2991 2992 2993
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
2994 2995 2996 2997 2998 2999 3000 3001
	case SIOCGETNODEID:
		if (copy_from_user(&nr, argp, sizeof(nr)))
			return -EFAULT;
		if (!tipc_node_get_id(net, nr.peer, nr.node_id))
			return -EADDRNOTAVAIL;
		if (copy_to_user(argp, &nr, sizeof(nr)))
			return -EFAULT;
		return 0;
E
Erik Hugne 已提交
3002 3003 3004 3005 3006
	default:
		return -ENOIOCTLCMD;
	}
}

3007 3008 3009 3010
static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
{
	struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
	struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
E
Erik Hugne 已提交
3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025
	u32 onode = tipc_own_addr(sock_net(sock1->sk));

	tsk1->peer.family = AF_TIPC;
	tsk1->peer.addrtype = TIPC_ADDR_ID;
	tsk1->peer.scope = TIPC_NODE_SCOPE;
	tsk1->peer.addr.id.ref = tsk2->portid;
	tsk1->peer.addr.id.node = onode;
	tsk2->peer.family = AF_TIPC;
	tsk2->peer.addrtype = TIPC_ADDR_ID;
	tsk2->peer.scope = TIPC_NODE_SCOPE;
	tsk2->peer.addr.id.ref = tsk1->portid;
	tsk2->peer.addr.id.node = onode;

	tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
	tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
3026 3027 3028
	return 0;
}

3029 3030
/* Protocol switches for the various types of TIPC sockets */

3031
static const struct proto_ops msg_ops = {
3032
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3033
	.family		= AF_TIPC,
3034 3035 3036
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
E
Erik Hugne 已提交
3037
	.socketpair	= tipc_socketpair,
3038
	.accept		= sock_no_accept,
3039
	.getname	= tipc_getname,
3040
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3041
	.ioctl		= tipc_ioctl,
3042
	.listen		= sock_no_listen,
3043 3044 3045 3046 3047
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
3048 3049
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3050 3051
};

3052
static const struct proto_ops packet_ops = {
3053
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3054
	.family		= AF_TIPC,
3055 3056 3057
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
3058
	.socketpair	= tipc_socketpair,
3059 3060
	.accept		= tipc_accept,
	.getname	= tipc_getname,
3061
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3062
	.ioctl		= tipc_ioctl,
3063 3064 3065 3066 3067 3068
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
3069 3070
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3071 3072
};

3073
static const struct proto_ops stream_ops = {
3074
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3075
	.family		= AF_TIPC,
3076 3077 3078
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
3079
	.socketpair	= tipc_socketpair,
3080 3081
	.accept		= tipc_accept,
	.getname	= tipc_getname,
3082
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3083
	.ioctl		= tipc_ioctl,
3084 3085 3086 3087
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
3088
	.sendmsg	= tipc_sendstream,
3089
	.recvmsg	= tipc_recvstream,
3090 3091
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3092 3093
};

3094
static const struct net_proto_family tipc_family_ops = {
3095
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3096
	.family		= AF_TIPC,
3097
	.create		= tipc_sk_create
P
Per Liden 已提交
3098 3099 3100 3101 3102
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
3103 3104
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
3105 3106 3107
};

/**
3108
 * tipc_socket_init - initialize TIPC socket interface
3109
 *
P
Per Liden 已提交
3110 3111
 * Returns 0 on success, errno otherwise
 */
3112
int tipc_socket_init(void)
P
Per Liden 已提交
3113 3114 3115
{
	int res;

3116
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
3117
	if (res) {
3118
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
3119 3120 3121 3122 3123
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
3124
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
3125 3126 3127 3128 3129 3130 3131 3132
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
3133
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
3134
 */
3135
void tipc_socket_stop(void)
P
Per Liden 已提交
3136 3137 3138 3139
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
3140 3141

/* Caller should hold socket lock for the passed tipc socket. */
3142
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

3176 3177 3178 3179 3180 3181 3182
static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
			  *tsk)
{
	struct net *net = sock_net(skb->sk);
	struct sock *sk = &tsk->sk;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
3183
	    nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net)))
3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195
		return -EMSGSIZE;

	if (tipc_sk_connected(sk)) {
		if (__tipc_nl_add_sk_con(skb, tsk))
			return -EMSGSIZE;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			return -EMSGSIZE;
	}
	return 0;
}

3196
/* Caller should hold socket lock for the passed tipc socket. */
3197 3198
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
3199 3200
{
	struct nlattr *attrs;
3201
	void *hdr;
3202 3203

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3204
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
3205 3206 3207 3208 3209 3210
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
3211 3212

	if (__tipc_nl_add_sk_info(skb, tsk))
3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

3228 3229 3230 3231
int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
		    int (*skb_handler)(struct sk_buff *skb,
				       struct netlink_callback *cb,
				       struct tipc_sock *tsk))
3232
{
3233
	struct net *net = sock_net(skb->sk);
3234 3235
	struct tipc_net *tn = tipc_net(net);
	const struct bucket_table *tbl;
3236
	u32 prev_portid = cb->args[1];
3237 3238 3239 3240
	u32 tbl_id = cb->args[0];
	struct rhash_head *pos;
	struct tipc_sock *tsk;
	int err;
3241

3242
	rcu_read_lock();
3243
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
3244 3245
	for (; tbl_id < tbl->size; tbl_id++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
3246
			spin_lock_bh(&tsk->sk.sk_lock.slock);
3247 3248 3249 3250 3251
			if (prev_portid && prev_portid != tsk->portid) {
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				continue;
			}

3252
			err = skb_handler(skb, cb, tsk);
3253 3254 3255 3256 3257
			if (err) {
				prev_portid = tsk->portid;
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				goto out;
			}
3258

3259
			prev_portid = 0;
3260 3261
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
3262
	}
3263
out:
3264
	rcu_read_unlock();
3265 3266
	cb->args[0] = tbl_id;
	cb->args[1] = prev_portid;
3267 3268 3269

	return skb->len;
}
3270 3271
EXPORT_SYMBOL(tipc_nl_sk_walk);

3272 3273
int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
			   struct tipc_sock *tsk, u32 sk_filter_state,
3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294
			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
{
	struct sock *sk = &tsk->sk;
	struct nlattr *attrs;
	struct nlattr *stat;

	/*filter response w.r.t sk_state*/
	if (!(sk_filter_state & (1 << sk->sk_state)))
		return 0;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto msg_cancel;

	if (__tipc_nl_add_sk_info(skb, tsk))
		goto attr_msg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_UID,
3295
			from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk),
3296
					 sock_i_uid(sk))) ||
3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308
	    nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
			      tipc_diag_gen_cookie(sk),
			      TIPC_NLA_SOCK_PAD))
		goto attr_msg_cancel;

	stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
	if (!stat)
		goto attr_msg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
			skb_queue_len(&sk->sk_receive_queue)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
3309 3310 3311
			skb_queue_len(&sk->sk_write_queue)) ||
	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
			atomic_read(&sk->sk_drops)))
3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322
		goto stat_msg_cancel;

	if (tsk->cong_link_cnt &&
	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
		goto stat_msg_cancel;

	if (tsk_conn_cong(tsk) &&
	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
		goto stat_msg_cancel;

	nla_nest_end(skb, stat);
3323 3324 3325 3326 3327

	if (tsk->group)
		if (tipc_group_fill_sock_diag(tsk->group, skb))
			goto stat_msg_cancel;

3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339
	nla_nest_end(skb, attrs);

	return 0;

stat_msg_cancel:
	nla_nest_cancel(skb, stat);
attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
msg_cancel:
	return -EMSGSIZE;
}
EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
3340

3341 3342
int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
3343
	return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
3344 3345
}

3346
/* Caller should hold socket lock for the passed tipc socket. */
3347 3348 3349
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
3350 3351 3352 3353 3354
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3355
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
3386 3387 3388
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
3389 3390 3391 3392 3393
{
	int err;
	struct publication *p;

	if (*last_publ) {
J
Jon Maloy 已提交
3394
		list_for_each_entry(p, &tsk->publications, binding_sock) {
3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
J
Jon Maloy 已提交
3411
				     binding_sock);
3412 3413
	}

J
Jon Maloy 已提交
3414
	list_for_each_entry_from(p, &tsk->publications, binding_sock) {
3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
3429
	u32 tsk_portid = cb->args[0];
3430 3431
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
3432
	struct net *net = sock_net(skb->sk);
3433 3434
	struct tipc_sock *tsk;

3435
	if (!tsk_portid) {
3436 3437 3438 3439 3440 3441 3442
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

3443 3444 3445
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

3446 3447
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
3448
				       tipc_nl_sock_policy, NULL);
3449 3450 3451 3452 3453 3454
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

3455
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
3456 3457 3458 3459 3460
	}

	if (done)
		return 0;

3461
	tsk = tipc_sk_lookup(net, tsk_portid);
3462 3463 3464 3465 3466 3467 3468 3469
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
3470
	sock_put(&tsk->sk);
3471

3472
	cb->args[0] = tsk_portid;
3473 3474 3475 3476 3477
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}