socket.c 85.9 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
J
Jon Maloy 已提交
4
 * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
38 39
#include <linux/sched/signal.h>

P
Per Liden 已提交
40
#include "core.h"
41
#include "name_table.h"
E
Erik Hugne 已提交
42
#include "node.h"
43
#include "link.h"
44
#include "name_distr.h"
45
#include "socket.h"
46
#include "bcast.h"
47
#include "netlink.h"
J
Jon Maloy 已提交
48
#include "group.h"
49

50
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
51
#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
52 53 54
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
55
#define TIPC_ACK_RATE		4       /* ACK at 1/4 of of rcv window size */
56

57 58
enum {
	TIPC_LISTEN = TCP_LISTEN,
59
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
60
	TIPC_OPEN = TCP_CLOSE,
61
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
62
	TIPC_CONNECTING = TCP_SYN_SENT,
63 64
};

65 66 67 68 69
struct sockaddr_pair {
	struct sockaddr_tipc sock;
	struct sockaddr_tipc member;
};

70 71 72 73 74 75 76
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
77
 * @portid: unique port identity in TIPC socket hash table
78
 * @phdr: preformatted message header used when sending messages
79
 * #cong_links: list of congested links
80
 * @publications: list of publications for port
81
 * @blocking_link: address of the congested link we are currently sleeping on
82 83 84 85
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
86
 * @cong_link_cnt: number of congested links
J
Jon Maloy 已提交
87
 * @snt_unacked: # messages sent by socket, and not yet acked by peer
88
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
89
 * @peer: 'connected' peer for dgram/rdm
90
 * @node: hash table node
91
 * @mc_method: cookie for use between socket and broadcast layer
92
 * @rcu: rcu struct for tipc_sock
93 94 95 96 97 98 99
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
100
	u32 portid;
101
	struct tipc_msg phdr;
102
	struct list_head cong_links;
103 104 105 106
	struct list_head publications;
	u32 pub_count;
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
107
	bool probe_unacked;
108
	u16 cong_link_cnt;
109 110
	u16 snt_unacked;
	u16 snd_win;
111
	u16 peer_caps;
112 113
	u16 rcv_unacked;
	u16 rcv_win;
114
	struct sockaddr_tipc peer;
115
	struct rhash_head node;
116
	struct tipc_mc_method mc_method;
117
	struct rcu_head rcu;
J
Jon Maloy 已提交
118
	struct tipc_group *group;
119
};
P
Per Liden 已提交
120

J
Jon Maloy 已提交
121
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
122
static void tipc_data_ready(struct sock *sk);
123
static void tipc_write_space(struct sock *sk);
124
static void tipc_sock_destruct(struct sock *sk);
125
static int tipc_release(struct socket *sock);
126 127
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern);
128
static void tipc_sk_timeout(unsigned long data);
129
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
130
			   struct tipc_name_seq const *seq);
131
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
132
			    struct tipc_name_seq const *seq);
J
Jon Maloy 已提交
133
static int tipc_sk_leave(struct tipc_sock *tsk);
134
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
135 136
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
137
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
138
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
139

140 141 142
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
143
static struct proto tipc_proto;
144 145
static const struct rhashtable_params tsk_rht_params;

146 147 148 149 150
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

151
static u32 tsk_peer_node(struct tipc_sock *tsk)
152
{
153
	return msg_destnode(&tsk->phdr);
154 155
}

156
static u32 tsk_peer_port(struct tipc_sock *tsk)
157
{
158
	return msg_destport(&tsk->phdr);
159 160
}

161
static  bool tsk_unreliable(struct tipc_sock *tsk)
162
{
163
	return msg_src_droppable(&tsk->phdr) != 0;
164 165
}

166
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
167
{
168
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
169 170
}

171
static bool tsk_unreturnable(struct tipc_sock *tsk)
172
{
173
	return msg_dest_droppable(&tsk->phdr) != 0;
174 175
}

176
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
177
{
178
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
179 180
}

181
static int tsk_importance(struct tipc_sock *tsk)
182
{
183
	return msg_importance(&tsk->phdr);
184 185
}

186
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
187 188 189
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
190
	msg_set_importance(&tsk->phdr, (u32)imp);
191 192
	return 0;
}
193

194 195 196 197 198
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

199
static bool tsk_conn_cong(struct tipc_sock *tsk)
200
{
201
	return tsk->snt_unacked > tsk->snd_win;
202 203
}

204 205 206 207 208
static u16 tsk_blocks(int len)
{
	return ((len / FLOWCTL_BLK_SZ) + 1);
}

209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
227 228
}

229
/**
230
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
231 232
 *
 * Caller must hold socket lock
P
Per Liden 已提交
233
 */
234
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
235
{
236
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
237 238
}

239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
255
/**
256
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
257 258
 *
 * Caller must hold socket lock
P
Per Liden 已提交
259
 */
260
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
261
{
262
	struct sk_buff *skb;
263

264 265
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
266 267
}

268 269
static bool tipc_sk_connected(struct sock *sk)
{
270
	return sk->sk_state == TIPC_ESTABLISHED;
271 272
}

273 274 275 276 277 278 279 280 281 282
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

283
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
284 285 286 287
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
288
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
289
{
290 291
	struct sock *sk = &tsk->sk;
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
292
	u32 peer_port = tsk_peer_port(tsk);
J
Jon Paul Maloy 已提交
293 294 295
	u32 orig_node;
	u32 peer_node;

296
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
297 298 299 300 301 302
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
303
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
304 305 306 307

	if (likely(orig_node == peer_node))
		return true;

308
	if (!orig_node && (peer_node == tn->own_addr))
J
Jon Paul Maloy 已提交
309 310
		return true;

311
	if (!peer_node && (orig_node == tn->own_addr))
J
Jon Paul Maloy 已提交
312 313 314 315 316
		return true;

	return false;
}

317 318 319 320 321 322 323 324 325
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
326
	int oldsk_state = sk->sk_state;
327 328 329
	int res = -EINVAL;

	switch (state) {
330 331 332
	case TIPC_OPEN:
		res = 0;
		break;
333
	case TIPC_LISTEN:
334
	case TIPC_CONNECTING:
335
		if (oldsk_state == TIPC_OPEN)
336 337
			res = 0;
		break;
338
	case TIPC_ESTABLISHED:
339
		if (oldsk_state == TIPC_CONNECTING ||
340
		    oldsk_state == TIPC_OPEN)
341 342
			res = 0;
		break;
343
	case TIPC_DISCONNECTING:
344
		if (oldsk_state == TIPC_CONNECTING ||
345 346 347
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
348 349 350 351 352 353 354 355
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
#define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
({                                                                             \
	struct sock *sk_;						       \
	int rc_;							       \
									       \
	while ((rc_ = !(condition_))) {					       \
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
		sk_ = (sock_)->sk;					       \
		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
		if (rc_)						       \
			break;						       \
		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
		release_sock(sk_);					       \
		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
		sched_annotate_sleep();				               \
		lock_sock(sk_);						       \
		remove_wait_queue(sk_sleep(sk_), &wait_);		       \
	}								       \
	rc_;								       \
397 398
})

P
Per Liden 已提交
399
/**
400
 * tipc_sk_create - create a TIPC socket
401
 * @net: network namespace (must be default network)
P
Per Liden 已提交
402 403
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
404
 * @kern: caused by kernel or by userspace?
405
 *
406 407
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
408 409 410
 *
 * Returns 0 on success, errno otherwise
 */
411 412
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
413
{
414
	struct tipc_net *tn;
415
	const struct proto_ops *ops;
P
Per Liden 已提交
416
	struct sock *sk;
417
	struct tipc_sock *tsk;
418
	struct tipc_msg *msg;
419 420

	/* Validate arguments */
P
Per Liden 已提交
421 422 423 424 425
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
426
		ops = &stream_ops;
P
Per Liden 已提交
427 428
		break;
	case SOCK_SEQPACKET:
429
		ops = &packet_ops;
P
Per Liden 已提交
430 431 432
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
433
		ops = &msg_ops;
P
Per Liden 已提交
434
		break;
435 436
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
437 438
	}

439
	/* Allocate socket's protocol area */
440
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
441
	if (sk == NULL)
P
Per Liden 已提交
442 443
		return -ENOMEM;

444
	tsk = tipc_sk(sk);
445 446
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
447
	INIT_LIST_HEAD(&tsk->cong_links);
448
	msg = &tsk->phdr;
449
	tn = net_generic(sock_net(sk), tipc_net_id);
P
Per Liden 已提交
450

451 452 453
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
454
	tipc_set_sk_state(sk, TIPC_OPEN);
455
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
456
		pr_warn("Socket create failed; port number exhausted\n");
457 458
		return -EINVAL;
	}
459 460 461 462 463 464 465

	/* Ensure tsk is visible before we read own_addr. */
	smp_mb();

	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
		      NAMED_H_SIZE, 0);

466
	msg_set_origport(msg, tsk->portid);
467
	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
468
	sk->sk_shutdown = 0;
J
Jon Maloy 已提交
469
	sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
470
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
471 472
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
473
	sk->sk_destruct = tipc_sock_destruct;
474 475
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
	atomic_set(&tsk->dupl_rcvcnt, 0);
476

477 478 479 480
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

481
	if (tipc_sk_type_connectionless(sk)) {
482
		tsk_set_unreturnable(tsk, true);
483
		if (sock->type == SOCK_DGRAM)
484
			tsk_set_unreliable(tsk, true);
485
	}
486

P
Per Liden 已提交
487 488 489
	return 0;
}

490 491 492 493 494 495 496
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

497 498 499 500 501 502
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
503
	long timeout = CONN_TIMEOUT_DEFAULT;
504 505 506
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

507 508 509 510
	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
					    !tsk_conn_cong(tsk)));

511 512 513 514 515 516
	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
517
			continue;
518
		}
519 520 521 522 523 524
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
525
	}
526 527 528 529

	if (tipc_sk_type_connectionless(sk))
		return;

530 531 532 533 534 535 536
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
537 538
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
539 540 541
	}
}

P
Per Liden 已提交
542
/**
543
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
544 545 546 547 548 549 550
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
551
 *
P
Per Liden 已提交
552 553 554 555 556 557
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
558
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
559 560
{
	struct sock *sk = sock->sk;
561
	struct tipc_sock *tsk;
P
Per Liden 已提交
562

563 564 565 566 567
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
568
		return 0;
569

570
	tsk = tipc_sk(sk);
571 572
	lock_sock(sk);

573 574
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
J
Jon Maloy 已提交
575
	tipc_sk_leave(tsk);
576
	tipc_sk_withdraw(tsk, 0, NULL);
577
	sk_stop_timer(sk, &sk->sk_timer);
578
	tipc_sk_remove(tsk);
P
Per Liden 已提交
579

580 581
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
J
Jon Maloy 已提交
582
	tipc_dest_list_purge(&tsk->cong_links);
583
	tsk->cong_link_cnt = 0;
584
	call_rcu(&tsk->rcu, tipc_sk_callback);
585
	sock->sk = NULL;
P
Per Liden 已提交
586

587
	return 0;
P
Per Liden 已提交
588 589 590
}

/**
591
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
592 593 594
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
595
 *
P
Per Liden 已提交
596 597 598
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
599
 *
P
Per Liden 已提交
600
 * Returns 0 on success, errno otherwise
601 602 603
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
604
 */
605 606
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
607
{
608
	struct sock *sk = sock->sk;
P
Per Liden 已提交
609
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
610
	struct tipc_sock *tsk = tipc_sk(sk);
611
	int res = -EINVAL;
P
Per Liden 已提交
612

613 614
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
615
		res = tipc_sk_withdraw(tsk, 0, NULL);
616 617
		goto exit;
	}
J
Jon Maloy 已提交
618 619 620 621
	if (tsk->group) {
		res = -EACCES;
		goto exit;
	}
622 623 624 625 626 627 628 629
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
630 631 632

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
633 634 635 636
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
637

638
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
639
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
640 641 642 643
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
644

645
	res = (addr->scope > 0) ?
646 647
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
648 649 650
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
651 652
}

653
/**
654
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
655 656 657
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
658
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
659
 *
P
Per Liden 已提交
660
 * Returns 0 on success, errno otherwise
661
 *
662 663
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
664
 *       a completely predictable manner).
P
Per Liden 已提交
665
 */
666 667
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
			int *uaddr_len, int peer)
P
Per Liden 已提交
668 669
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
670 671
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
672
	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
P
Per Liden 已提交
673

674
	memset(addr, 0, sizeof(*addr));
675
	if (peer) {
676
		if ((!tipc_sk_connected(sk)) &&
677
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
678
			return -ENOTCONN;
679 680
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
681
	} else {
682
		addr->addr.id.ref = tsk->portid;
683
		addr->addr.id.node = tn->own_addr;
684
	}
P
Per Liden 已提交
685 686 687 688 689 690 691

	*uaddr_len = sizeof(*addr);
	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

692
	return 0;
P
Per Liden 已提交
693 694 695
}

/**
696
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
697 698 699 700
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
 * @wait: ???
 *
701 702 703 704 705 706 707 708
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
709 710 711
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
712
 */
713 714
static unsigned int tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
715
{
716
	struct sock *sk = sock->sk;
717
	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
718
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
719
	struct tipc_group *grp = tsk->group;
720
	u32 revents = 0;
721

722
	sock_poll_wait(file, sk_sleep(sk), wait);
723

724
	if (sk->sk_shutdown & RCV_SHUTDOWN)
725
		revents |= POLLRDHUP | POLLIN | POLLRDNORM;
726
	if (sk->sk_shutdown == SHUTDOWN_MASK)
727
		revents |= POLLHUP;
728

729 730
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
731
		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
732
			revents |= POLLOUT;
733 734 735
		/* fall thru' */
	case TIPC_LISTEN:
	case TIPC_CONNECTING:
736 737
		if (skb)
			revents |= POLLIN | POLLRDNORM;
738 739
		break;
	case TIPC_OPEN:
J
Jon Maloy 已提交
740 741
		if (!grp || tipc_group_size(grp))
			if (!tsk->cong_link_cnt)
742 743 744 745 746 747
				revents |= POLLOUT;
		if (!tipc_sk_type_connectionless(sk))
			break;
		if (!skb)
			break;
		revents |= POLLIN | POLLRDNORM;
748 749
		break;
	case TIPC_DISCONNECTING:
750
		revents = POLLIN | POLLRDNORM | POLLHUP;
751
		break;
752
	}
753
	return revents;
P
Per Liden 已提交
754 755
}

756 757 758 759
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
760
 * @msg: message to send
761 762
 * @dlen: length of data to send
 * @timeout: timeout to wait for wakeup
763 764 765 766 767
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
768
			  struct msghdr *msg, size_t dlen, long timeout)
769 770
{
	struct sock *sk = sock->sk;
771
	struct tipc_sock *tsk = tipc_sk(sk);
772
	struct tipc_msg *hdr = &tsk->phdr;
773
	struct net *net = sock_net(sk);
774
	int mtu = tipc_bcast_get_mtu(net);
775
	struct tipc_mc_method *method = &tsk->mc_method;
776
	u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE);
777
	struct sk_buff_head pkts;
778
	struct tipc_nlist dsts;
779 780
	int rc;

J
Jon Maloy 已提交
781 782 783
	if (tsk->group)
		return -EACCES;

784
	/* Block or return if any destination link is congested */
785 786 787
	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;
788

789 790 791 792 793 794 795 796
	/* Lookup destination nodes */
	tipc_nlist_init(&dsts, tipc_own_addr(net));
	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
				      seq->upper, domain, &dsts);
	if (!dsts.local && !dsts.remote)
		return -EHOSTUNREACH;

	/* Build message header */
797
	msg_set_type(hdr, TIPC_MCAST_MSG);
798
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
799 800 801 802 803 804 805
	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nametype(hdr, seq->type);
	msg_set_namelower(hdr, seq->lower);
	msg_set_nameupper(hdr, seq->upper);

806
	/* Build message as chain of buffers */
807 808
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
809

810 811
	/* Send message if build was successful */
	if (unlikely(rc == dlen))
812
		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
813 814 815
				     &tsk->cong_link_cnt);

	tipc_nlist_purge(&dsts);
816 817

	return rc ? rc : dlen;
818 819
}

820 821 822 823 824 825 826 827 828 829 830 831 832
/**
 * tipc_send_group_msg - send a message to a member in the group
 * @net: network namespace
 * @m: message to send
 * @mb: group member
 * @dnode: destination node
 * @dport: destination port
 * @dlen: total length of message data
 */
static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
			       struct msghdr *m, struct tipc_member *mb,
			       u32 dnode, u32 dport, int dlen)
{
833
	u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
834
	struct tipc_mc_method *method = &tsk->mc_method;
835 836 837 838 839 840 841 842 843 844
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_msg *hdr = &tsk->phdr;
	struct sk_buff_head pkts;
	int mtu, rc;

	/* Complete message header */
	msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
	msg_set_destport(hdr, dport);
	msg_set_destnode(hdr, dnode);
845
	msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860

	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
		tipc_dest_push(&tsk->cong_links, dnode, 0);
		tsk->cong_link_cnt++;
	}

861
	/* Update send window */
862 863
	tipc_group_update_member(mb, blks);

864 865 866
	/* A broadcast sent within next EXPIRE period must follow same path */
	method->rcast = true;
	method->mandatory = true;
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912
	return dlen;
}

/**
 * tipc_send_group_unicast - send message to a member in the group
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
				   int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct net *net = sock_net(sk);
	struct tipc_member *mb = NULL;
	u32 node, port;
	int rc;

	node = dest->addr.id.node;
	port = dest->addr.id.ref;
	if (!port && !node)
		return -EHOSTUNREACH;

	/* Block or return if destination link or member is congested */
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(&tsk->cong_links, node, 0) &&
				!tipc_group_cong(grp, node, port, blks, &mb));
	if (unlikely(rc))
		return rc;

	if (unlikely(!mb))
		return -EHOSTUNREACH;

	rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);

	return rc ? rc : dlen;
}

913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994
/**
 * tipc_send_group_anycast - send message to any member with given identity
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
				   int dlen, long timeout)
{
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct list_head *cong_links = &tsk->cong_links;
	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
	struct tipc_group *grp = tsk->group;
	struct tipc_member *first = NULL;
	struct tipc_member *mbr = NULL;
	struct net *net = sock_net(sk);
	u32 node, port, exclude;
	u32 type, inst, domain;
	struct list_head dsts;
	int lookups = 0;
	int dstcnt, rc;
	bool cong;

	INIT_LIST_HEAD(&dsts);

	type = dest->addr.name.name.type;
	inst = dest->addr.name.name.instance;
	domain = addr_domain(net, dest->scope);
	exclude = tipc_group_exclude(grp);

	while (++lookups < 4) {
		first = NULL;

		/* Look for a non-congested destination member, if any */
		while (1) {
			if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
						 &dstcnt, exclude, false))
				return -EHOSTUNREACH;
			tipc_dest_pop(&dsts, &node, &port);
			cong = tipc_group_cong(grp, node, port, blks, &mbr);
			if (!cong)
				break;
			if (mbr == first)
				break;
			if (!first)
				first = mbr;
		}

		/* Start over if destination was not in member list */
		if (unlikely(!mbr))
			continue;

		if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
			break;

		/* Block or return if destination link or member is congested */
		rc = tipc_wait_for_cond(sock, &timeout,
					!tipc_dest_find(cong_links, node, 0) &&
					!tipc_group_cong(grp, node, port,
							 blks, &mbr));
		if (unlikely(rc))
			return rc;

		/* Send, unless destination disappeared while waiting */
		if (likely(mbr))
			break;
	}

	if (unlikely(lookups >= 4))
		return -EHOSTUNREACH;

	rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);

	return rc ? rc : dlen;
}

J
Jon Maloy 已提交
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007
/**
 * tipc_send_group_bcast - send message to all members in communication group
 * @sk: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
1008
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
J
Jon Maloy 已提交
1009 1010 1011 1012 1013 1014
	struct sock *sk = sock->sk;
	struct net *net = sock_net(sk);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_nlist *dsts = tipc_group_dests(grp);
	struct tipc_mc_method *method = &tsk->mc_method;
1015
	bool ack = method->mandatory && method->rcast;
1016
	int blks = tsk_blocks(MCAST_H_SIZE + dlen);
J
Jon Maloy 已提交
1017 1018 1019 1020 1021 1022 1023 1024
	struct tipc_msg *hdr = &tsk->phdr;
	int mtu = tipc_bcast_get_mtu(net);
	struct sk_buff_head pkts;
	int rc = -EHOSTUNREACH;

	if (!dsts->local && !dsts->remote)
		return -EHOSTUNREACH;

1025 1026 1027
	/* Block or return if any destination link or member is congested */
	rc = tipc_wait_for_cond(sock, &timeout,	!tsk->cong_link_cnt &&
				!tipc_group_bc_cong(grp, blks));
J
Jon Maloy 已提交
1028 1029 1030 1031
	if (unlikely(rc))
		return rc;

	/* Complete message header */
1032 1033 1034 1035 1036 1037 1038
	if (dest) {
		msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
		msg_set_nameinst(hdr, dest->addr.name.name.instance);
	} else {
		msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
		msg_set_nameinst(hdr, 0);
	}
1039
	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
J
Jon Maloy 已提交
1040 1041 1042 1043
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));

1044 1045 1046
	/* Avoid getting stuck with repeated forced replicasts */
	msg_set_grp_bc_ack_req(hdr, ack);

J
Jon Maloy 已提交
1047 1048 1049 1050 1051 1052 1053
	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
1054
	rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
J
Jon Maloy 已提交
1055 1056 1057
	if (unlikely(rc))
		return rc;

1058
	/* Update broadcast sequence number and send windows */
1059 1060 1061 1062 1063 1064
	tipc_group_update_bc_members(tsk->group, blks, ack);

	/* Broadcast link is now free to choose method for next broadcast */
	method->mandatory = false;
	method->expires = jiffies;

J
Jon Maloy 已提交
1065 1066 1067
	return dlen;
}

1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
/**
 * tipc_send_group_mcast - send message to all members with given identity
 * @sock: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	struct tipc_name_seq *seq = &dest->addr.nameseq;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct net *net = sock_net(sk);
	u32 domain, exclude, dstcnt;
	struct list_head dsts;

	INIT_LIST_HEAD(&dsts);

	if (seq->lower != seq->upper)
		return -ENOTSUPP;

	domain = addr_domain(net, dest->scope);
	exclude = tipc_group_exclude(grp);
	if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
				 &dsts, &dstcnt, exclude, true))
		return -EHOSTUNREACH;

	if (dstcnt == 1) {
		tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
		return tipc_send_group_unicast(sock, m, dlen, timeout);
	}

	tipc_dest_list_purge(&dsts);
	return tipc_send_group_bcast(sock, m, dlen, timeout);
}

1110 1111 1112 1113 1114 1115
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
1116
 */
1117 1118
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
1119 1120
{
	u32 scope = TIPC_CLUSTER_SCOPE;
J
Jon Maloy 已提交
1121
	u32 self = tipc_own_addr(net);
1122
	struct sk_buff *skb, *_skb;
J
Jon Maloy 已提交
1123 1124 1125 1126 1127
	u32 lower = 0, upper = ~0;
	struct sk_buff_head tmpq;
	u32 portid, oport, onode;
	struct list_head dports;
	struct tipc_msg *msg;
1128
	int user, mtyp, hsz;
1129

1130
	__skb_queue_head_init(&tmpq);
1131
	INIT_LIST_HEAD(&dports);
1132

1133 1134 1135
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
		msg = buf_msg(skb);
1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
		user = msg_user(msg);
		mtyp = msg_type(msg);
		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
			spin_lock_bh(&inputq->lock);
			if (skb_peek(arrvq) == skb) {
				__skb_dequeue(arrvq);
				__skb_queue_tail(inputq, skb);
			}
			refcount_dec(&skb->users);
			spin_unlock_bh(&inputq->lock);
			continue;
		}
1148
		hsz = skb_headroom(skb) + msg_hdr_sz(msg);
J
Jon Maloy 已提交
1149 1150 1151
		oport = msg_origport(msg);
		onode = msg_orignode(msg);
		if (onode == self)
1152 1153 1154
			scope = TIPC_NODE_SCOPE;

		/* Create destination port list and message clones: */
J
Jon Maloy 已提交
1155 1156 1157 1158 1159 1160
		if (!msg_in_group(msg)) {
			lower = msg_namelower(msg);
			upper = msg_nameupper(msg);
		}
		tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
					  scope, &dports);
J
Jon Maloy 已提交
1161
		while (tipc_dest_pop(&dports, NULL, &portid)) {
1162 1163 1164 1165 1166 1167 1168
			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
1169
		}
1170 1171 1172 1173 1174 1175 1176 1177 1178
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
1179
	}
1180
	tipc_sk_rcv(net, inputq);
1181 1182
}

1183
/**
J
Jon Maloy 已提交
1184
 * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
1185
 * @tsk: receiving socket
1186
 * @skb: pointer to message buffer.
1187
 */
J
Jon Maloy 已提交
1188 1189
static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
				   struct sk_buff_head *xmitq)
1190
{
1191
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1192 1193
	u32 onode = tsk_own_node(tsk);
	struct sock *sk = &tsk->sk;
1194
	int mtyp = msg_type(hdr);
1195
	bool conn_cong;
1196

1197
	/* Ignore if connection cannot be validated: */
1198
	if (!tsk_peer_msg(tsk, hdr))
1199 1200
		goto exit;

1201 1202 1203 1204 1205 1206 1207 1208
	if (unlikely(msg_errcode(hdr))) {
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
				      tsk_peer_port(tsk));
		sk->sk_state_change(sk);
		goto exit;
	}

1209
	tsk->probe_unacked = false;
1210

1211 1212
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
1213 1214
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
1215 1216
		return;
	} else if (mtyp == CONN_ACK) {
1217
		conn_cong = tsk_conn_cong(tsk);
1218 1219 1220
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
1221
		if (conn_cong)
1222 1223 1224
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
1225 1226
	}
exit:
1227
	kfree_skb(skb);
1228 1229
}

P
Per Liden 已提交
1230
/**
1231
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
1232 1233
 * @sock: socket structure
 * @m: message to send
1234
 * @dsz: amount of user data to be sent
1235
 *
P
Per Liden 已提交
1236
 * Message must have an destination specified explicitly.
1237
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
1238 1239
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
1240
 *
P
Per Liden 已提交
1241 1242
 * Returns the number of bytes sent on success, or errno otherwise
 */
1243
static int tipc_sendmsg(struct socket *sock,
1244
			struct msghdr *m, size_t dsz)
1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

1256
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1257
{
1258
	struct sock *sk = sock->sk;
1259
	struct net *net = sock_net(sk);
1260 1261 1262 1263 1264
	struct tipc_sock *tsk = tipc_sk(sk);
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct list_head *clinks = &tsk->cong_links;
	bool syn = !tipc_sk_type_connectionless(sk);
J
Jon Maloy 已提交
1265
	struct tipc_group *grp = tsk->group;
1266
	struct tipc_msg *hdr = &tsk->phdr;
1267
	struct tipc_name_seq *seq;
1268 1269 1270 1271
	struct sk_buff_head pkts;
	u32 type, inst, domain;
	u32 dnode, dport;
	int mtu, rc;
P
Per Liden 已提交
1272

1273
	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
1274
		return -EMSGSIZE;
1275

1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
	if (likely(dest)) {
		if (unlikely(m->msg_namelen < sizeof(*dest)))
			return -EINVAL;
		if (unlikely(dest->family != AF_TIPC))
			return -EINVAL;
	}

	if (grp) {
		if (!dest)
			return tipc_send_group_bcast(sock, m, dlen, timeout);
1286 1287
		if (dest->addrtype == TIPC_ADDR_NAME)
			return tipc_send_group_anycast(sock, m, dlen, timeout);
1288 1289
		if (dest->addrtype == TIPC_ADDR_ID)
			return tipc_send_group_unicast(sock, m, dlen, timeout);
1290 1291
		if (dest->addrtype == TIPC_ADDR_MCAST)
			return tipc_send_group_mcast(sock, m, dlen, timeout);
1292 1293
		return -EINVAL;
	}
J
Jon Maloy 已提交
1294

1295
	if (unlikely(!dest)) {
1296 1297
		dest = &tsk->peer;
		if (!syn || dest->family != AF_TIPC)
1298 1299
			return -EDESTADDRREQ;
	}
1300 1301

	if (unlikely(syn)) {
1302
		if (sk->sk_state == TIPC_LISTEN)
1303
			return -EPIPE;
1304
		if (sk->sk_state != TIPC_OPEN)
1305 1306 1307
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
1308
		if (dest->addrtype == TIPC_ADDR_NAME) {
1309 1310
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
1311
		}
P
Per Liden 已提交
1312
	}
1313

1314 1315 1316
	seq = &dest->addr.nameseq;
	if (dest->addrtype == TIPC_ADDR_MCAST)
		return tipc_sendmcast(sock, seq, m, dlen, timeout);
1317

1318 1319 1320 1321
	if (dest->addrtype == TIPC_ADDR_NAME) {
		type = dest->addr.name.name.type;
		inst = dest->addr.name.name.instance;
		domain = dest->addr.name.domain;
1322
		dnode = domain;
1323 1324 1325 1326 1327
		msg_set_type(hdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
		msg_set_nametype(hdr, type);
		msg_set_nameinst(hdr, inst);
		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
1328
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
1329 1330
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dport);
1331 1332
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
1333 1334
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
1335 1336 1337 1338 1339
		msg_set_type(hdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(hdr, 0);
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dest->addr.id.ref);
		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
1340 1341
	}

1342
	/* Block or return if destination link is congested */
J
Jon Maloy 已提交
1343 1344
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(clinks, dnode, 0));
1345 1346 1347 1348
	if (unlikely(rc))
		return rc;

	skb_queue_head_init(&pkts);
1349
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
1350 1351
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
1352
		return rc;
1353

1354 1355
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
J
Jon Maloy 已提交
1356
		tipc_dest_push(clinks, dnode, 0);
1357 1358 1359
		tsk->cong_link_cnt++;
		rc = 0;
	}
1360

1361 1362 1363 1364
	if (unlikely(syn && !rc))
		tipc_set_sk_state(sk, TIPC_CONNECTING);

	return rc ? rc : dlen;
P
Per Liden 已提交
1365 1366
}

1367
/**
1368
 * tipc_sendstream - send stream-oriented data
P
Per Liden 已提交
1369
 * @sock: socket structure
1370 1371
 * @m: data to send
 * @dsz: total length of data to be transmitted
1372
 *
1373
 * Used for SOCK_STREAM data.
1374
 *
1375 1376
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1377
 */
1378
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1379 1380 1381 1382 1383
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
1384
	ret = __tipc_sendstream(sock, m, dsz);
1385 1386 1387 1388 1389
	release_sock(sk);

	return ret;
}

1390
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1391
{
1392
	struct sock *sk = sock->sk;
1393
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1394 1395 1396 1397 1398 1399 1400 1401
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = &tsk->phdr;
	struct net *net = sock_net(sk);
	struct sk_buff_head pkts;
	u32 dnode = tsk_peer_node(tsk);
	int send, sent = 0;
	int rc = 0;
1402

1403
	skb_queue_head_init(&pkts);
1404

1405 1406
	if (unlikely(dlen > INT_MAX))
		return -EMSGSIZE;
1407

1408 1409 1410 1411 1412
	/* Handle implicit connection setup */
	if (unlikely(dest)) {
		rc = __tipc_sendmsg(sock, m, dlen);
		if (dlen && (dlen == rc))
			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1413
		return rc;
1414
	}
1415

1416
	do {
1417 1418
		rc = tipc_wait_for_cond(sock, &timeout,
					(!tsk->cong_link_cnt &&
1419 1420
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438
		if (unlikely(rc))
			break;

		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
		if (unlikely(rc != send))
			break;

		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
		if (unlikely(rc == -ELINKCONG)) {
			tsk->cong_link_cnt = 1;
			rc = 0;
		}
		if (likely(!rc)) {
			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
			sent += send;
		}
	} while (sent < dlen && !rc);
1439

1440
	return sent ? sent : rc;
P
Per Liden 已提交
1441 1442
}

1443
/**
1444
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1445
 * @sock: socket structure
1446 1447
 * @m: message to send
 * @dsz: length of data to be transmitted
1448
 *
1449
 * Used for SOCK_SEQPACKET messages.
1450
 *
1451
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1452
 */
1453
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1454
{
1455 1456
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1457

1458
	return tipc_sendstream(sock, m, dsz);
P
Per Liden 已提交
1459 1460
}

1461
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1462
 */
1463
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1464
				u32 peer_node)
P
Per Liden 已提交
1465
{
1466 1467
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1468
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1469

1470 1471 1472 1473 1474
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1475

1476
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
1477
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1478 1479
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1480
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1481 1482 1483 1484 1485 1486
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1487 1488 1489
}

/**
1490
 * tipc_sk_set_orig_addr - capture sender's address for received message
P
Per Liden 已提交
1491
 * @m: descriptor for message info
1492
 * @hdr: received message header
1493
 *
P
Per Liden 已提交
1494 1495
 * Note: Address is not captured if not requested by receiver.
 */
1496
static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
P
Per Liden 已提交
1497
{
1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521
	DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
	struct tipc_msg *hdr = buf_msg(skb);

	if (!srcaddr)
		return;

	srcaddr->sock.family = AF_TIPC;
	srcaddr->sock.addrtype = TIPC_ADDR_ID;
	srcaddr->sock.addr.id.ref = msg_origport(hdr);
	srcaddr->sock.addr.id.node = msg_orignode(hdr);
	srcaddr->sock.addr.name.domain = 0;
	srcaddr->sock.scope = 0;
	m->msg_namelen = sizeof(struct sockaddr_tipc);

	if (!msg_in_group(hdr))
		return;

	/* Group message users may also want to know sending member's id */
	srcaddr->member.family = AF_TIPC;
	srcaddr->member.addrtype = TIPC_ADDR_NAME;
	srcaddr->member.addr.name.name.type = msg_nametype(hdr);
	srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
	srcaddr->member.addr.name.domain = 0;
	m->msg_namelen = sizeof(*srcaddr);
P
Per Liden 已提交
1522 1523 1524
}

/**
1525
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1526 1527
 * @m: descriptor for message info
 * @msg: received message header
1528
 * @tsk: TIPC port associated with message
1529
 *
P
Per Liden 已提交
1530
 * Note: Ancillary data is not captured if not requested by receiver.
1531
 *
P
Per Liden 已提交
1532 1533
 * Returns 0 if successful, otherwise errno
 */
1534 1535
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1536 1537 1538 1539
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1540
	int has_name;
P
Per Liden 已提交
1541 1542 1543 1544 1545 1546 1547 1548 1549 1550
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1551 1552
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1553
			return res;
1554 1555 1556 1557 1558 1559
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1560 1561 1562 1563 1564 1565
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1566
		has_name = 1;
P
Per Liden 已提交
1567 1568 1569 1570 1571
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1572
		has_name = 1;
P
Per Liden 已提交
1573 1574 1575 1576 1577
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1578 1579 1580 1581
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1582 1583
		break;
	default:
1584
		has_name = 0;
P
Per Liden 已提交
1585
	}
1586 1587 1588 1589 1590
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1591 1592 1593 1594

	return 0;
}

1595
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1596
{
1597 1598
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1599
	struct sk_buff *skb = NULL;
1600
	struct tipc_msg *msg;
1601 1602
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1603

1604
	if (!tipc_sk_connected(sk))
1605
		return;
1606 1607 1608
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1609
	if (!skb)
1610
		return;
1611
	msg = buf_msg(skb);
1612 1613 1614 1615 1616 1617 1618 1619
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1620
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1621 1622
}

1623
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1624 1625 1626
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1627
	long timeo = *timeop;
1628 1629 1630 1631
	int err = sock_error(sk);

	if (err)
		return err;
Y
Ying Xue 已提交
1632 1633 1634

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1635
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1636
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1650 1651 1652
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
1653 1654 1655 1656

		err = sock_error(sk);
		if (err)
			break;
Y
Ying Xue 已提交
1657 1658
	}
	finish_wait(sk_sleep(sk), &wait);
1659
	*timeop = timeo;
Y
Ying Xue 已提交
1660 1661 1662
	return err;
}

1663
/**
1664
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1665
 * @m: descriptor for message info
1666
 * @buflen: length of user buffer area
P
Per Liden 已提交
1667
 * @flags: receive flags
1668
 *
P
Per Liden 已提交
1669 1670 1671 1672 1673
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1674 1675
static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
			size_t buflen,	int flags)
P
Per Liden 已提交
1676
{
1677
	struct sock *sk = sock->sk;
1678
	bool connected = !tipc_sk_type_connectionless(sk);
1679
	struct tipc_sock *tsk = tipc_sk(sk);
1680
	int rc, err, hlen, dlen, copy;
1681
	struct sk_buff_head xmitq;
1682 1683 1684
	struct tipc_msg *hdr;
	struct sk_buff *skb;
	bool grp_evt;
1685
	long timeout;
P
Per Liden 已提交
1686

1687
	/* Catch invalid receive requests */
1688
	if (unlikely(!buflen))
P
Per Liden 已提交
1689 1690
		return -EINVAL;

1691
	lock_sock(sk);
1692 1693
	if (unlikely(connected && sk->sk_state == TIPC_OPEN)) {
		rc = -ENOTCONN;
P
Per Liden 已提交
1694 1695
		goto exit;
	}
1696
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1697

1698
	/* Step rcv queue to first msg with data or error; wait if necessary */
1699 1700 1701 1702 1703 1704 1705 1706 1707
	do {
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			goto exit;
		skb = skb_peek(&sk->sk_receive_queue);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1708
		grp_evt = msg_is_grp_evt(hdr);
1709 1710
		if (likely(dlen || err))
			break;
1711
		tsk_advance_rx_queue(sk);
1712
	} while (1);
P
Per Liden 已提交
1713

1714
	/* Collect msg meta data, including error code and rejected data */
1715
	tipc_sk_set_orig_addr(m, skb);
1716 1717
	rc = tipc_sk_anc_data_recv(m, hdr, tsk);
	if (unlikely(rc))
P
Per Liden 已提交
1718 1719
		goto exit;

1720 1721 1722 1723
	/* Capture data if non-error msg, otherwise just set return value */
	if (likely(!err)) {
		copy = min_t(int, dlen, buflen);
		if (unlikely(copy != dlen))
P
Per Liden 已提交
1724
			m->msg_flags |= MSG_TRUNC;
1725
		rc = skb_copy_datagram_msg(skb, hlen, m, copy);
P
Per Liden 已提交
1726
	} else {
1727 1728 1729 1730
		copy = 0;
		rc = 0;
		if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
			rc = -ECONNRESET;
P
Per Liden 已提交
1731
	}
1732 1733
	if (unlikely(rc))
		goto exit;
P
Per Liden 已提交
1734

1735 1736 1737 1738 1739 1740 1741 1742
	/* Mark message as group event if applicable */
	if (unlikely(grp_evt)) {
		if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
			m->msg_flags |= MSG_EOR;
		m->msg_flags |= MSG_OOB;
		copy = 0;
	}

1743
	/* Caption of data or error code/rejected data was successful */
1744 1745 1746
	if (unlikely(flags & MSG_PEEK))
		goto exit;

1747 1748 1749 1750 1751 1752 1753 1754 1755
	/* Send group flow control advertisement when applicable */
	if (tsk->group && msg_in_group(hdr) && !grp_evt) {
		skb_queue_head_init(&xmitq);
		tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
					  msg_orignode(hdr), msg_origport(hdr),
					  &xmitq);
		tipc_node_distr_xmit(sock_net(sk), &xmitq);
	}

1756
	tsk_advance_rx_queue(sk);
1757

1758 1759 1760
	if (likely(!connected))
		goto exit;

1761
	/* Send connection flow control advertisement when applicable */
1762 1763 1764
	tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
	if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
		tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1765
exit:
1766
	release_sock(sk);
1767
	return rc ? rc : copy;
P
Per Liden 已提交
1768 1769
}

1770
/**
1771
 * tipc_recvstream - receive stream-oriented data
P
Per Liden 已提交
1772
 * @m: descriptor for message info
1773
 * @buflen: total size of user buffer area
P
Per Liden 已提交
1774
 * @flags: receive flags
1775 1776
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1777 1778 1779 1780
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1781 1782
static int tipc_recvstream(struct socket *sock, struct msghdr *m,
			   size_t buflen, int flags)
P
Per Liden 已提交
1783
{
1784
	struct sock *sk = sock->sk;
1785
	struct tipc_sock *tsk = tipc_sk(sk);
1786 1787 1788 1789 1790 1791 1792
	struct sk_buff *skb;
	struct tipc_msg *hdr;
	struct tipc_skb_cb *skb_cb;
	bool peek = flags & MSG_PEEK;
	int offset, required, copy, copied = 0;
	int hlen, dlen, err, rc;
	long timeout;
P
Per Liden 已提交
1793

1794
	/* Catch invalid receive attempts */
1795
	if (unlikely(!buflen))
P
Per Liden 已提交
1796 1797
		return -EINVAL;

1798
	lock_sock(sk);
P
Per Liden 已提交
1799

1800
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1801
		rc = -ENOTCONN;
Y
Ying Xue 已提交
1802
		goto exit;
P
Per Liden 已提交
1803
	}
1804 1805
	required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen);
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1806

1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817
	do {
		/* Look at first msg in receive queue; wait if necessary */
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			break;
		skb = skb_peek(&sk->sk_receive_queue);
		skb_cb = TIPC_SKB_CB(skb);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1818

1819 1820 1821 1822 1823
		/* Discard any empty non-errored (SYN-) message */
		if (unlikely(!dlen && !err)) {
			tsk_advance_rx_queue(sk);
			continue;
		}
1824

1825 1826
		/* Collect msg meta data, incl. error code and rejected data */
		if (!copied) {
1827
			tipc_sk_set_orig_addr(m, skb);
1828 1829 1830 1831
			rc = tipc_sk_anc_data_recv(m, hdr, tsk);
			if (rc)
				break;
		}
P
Per Liden 已提交
1832

1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852
		/* Copy data if msg ok, otherwise return error/partial data */
		if (likely(!err)) {
			offset = skb_cb->bytes_read;
			copy = min_t(int, dlen - offset, buflen - copied);
			rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
			if (unlikely(rc))
				break;
			copied += copy;
			offset += copy;
			if (unlikely(offset < dlen)) {
				if (!peek)
					skb_cb->bytes_read = offset;
				break;
			}
		} else {
			rc = 0;
			if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control)
				rc = -ECONNRESET;
			if (copied || rc)
				break;
P
Per Liden 已提交
1853 1854
		}

1855 1856
		if (unlikely(peek))
			break;
P
Per Liden 已提交
1857

1858
		tsk_advance_rx_queue(sk);
1859

1860 1861 1862 1863
		/* Send connection flow control advertisement when applicable */
		tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
		if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
			tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1864

1865 1866 1867
		/* Exit if all requested data or FIN/error received */
		if (copied == buflen || err)
			break;
P
Per Liden 已提交
1868

1869
	} while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required);
P
Per Liden 已提交
1870
exit:
1871
	release_sock(sk);
1872
	return copied ? copied : rc;
P
Per Liden 已提交
1873 1874
}

1875 1876 1877 1878 1879 1880 1881 1882 1883 1884
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1885
	if (skwq_has_sleeper(wq))
1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
						POLLWRNORM | POLLWRBAND);
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1896
static void tipc_data_ready(struct sock *sk)
1897 1898 1899 1900 1901
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1902
	if (skwq_has_sleeper(wq))
1903 1904 1905 1906 1907
		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
						POLLRDNORM | POLLRDBAND);
	rcu_read_unlock();
}

1908 1909 1910 1911 1912
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

J
Jon Maloy 已提交
1913 1914 1915 1916 1917 1918 1919
static void tipc_sk_proto_rcv(struct sock *sk,
			      struct sk_buff_head *inputq,
			      struct sk_buff_head *xmitq)
{
	struct sk_buff *skb = __skb_dequeue(inputq);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1920
	struct tipc_group *grp = tsk->group;
1921
	bool wakeup = false;
J
Jon Maloy 已提交
1922 1923 1924 1925 1926 1927

	switch (msg_user(hdr)) {
	case CONN_MANAGER:
		tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
		return;
	case SOCK_WAKEUP:
J
Jon Maloy 已提交
1928
		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
J
Jon Maloy 已提交
1929
		tsk->cong_link_cnt--;
1930
		wakeup = true;
J
Jon Maloy 已提交
1931
		break;
J
Jon Maloy 已提交
1932
	case GROUP_PROTOCOL:
1933
		tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
J
Jon Maloy 已提交
1934
		break;
J
Jon Maloy 已提交
1935
	case TOP_SRV:
1936 1937
		tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
				      skb, inputq, xmitq);
J
Jon Maloy 已提交
1938
		skb = NULL;
J
Jon Maloy 已提交
1939 1940 1941 1942 1943
		break;
	default:
		break;
	}

1944 1945 1946
	if (wakeup)
		sk->sk_write_space(sk);

J
Jon Maloy 已提交
1947 1948 1949
	kfree_skb(skb);
}

1950
/**
J
Jon Maloy 已提交
1951
 * tipc_filter_connect - Handle incoming message for a connection-based socket
1952
 * @tsk: TIPC socket
1953
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1954
 *
1955
 * Returns true if everything ok, false otherwise
1956
 */
J
Jon Maloy 已提交
1957
static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1958
{
1959
	struct sock *sk = &tsk->sk;
1960
	struct net *net = sock_net(sk);
1961
	struct tipc_msg *hdr = buf_msg(skb);
1962 1963
	u32 pport = msg_origport(hdr);
	u32 pnode = msg_orignode(hdr);
1964

1965 1966
	if (unlikely(msg_mcast(hdr)))
		return false;
1967

1968 1969
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1970
		/* Accept only ACK or NACK message */
1971 1972 1973 1974 1975 1976 1977 1978 1979 1980
		if (unlikely(!msg_connected(hdr))) {
			if (pport != tsk_peer_port(tsk) ||
			    pnode != tsk_peer_node(tsk))
				return false;

			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			sk->sk_err = ECONNREFUSED;
			sk->sk_state_change(sk);
			return true;
		}
1981

1982
		if (unlikely(msg_errcode(hdr))) {
1983
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1984
			sk->sk_err = ECONNREFUSED;
1985
			sk->sk_state_change(sk);
1986
			return true;
1987 1988
		}

1989
		if (unlikely(!msg_isdata(hdr))) {
1990
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1991
			sk->sk_err = EINVAL;
1992
			sk->sk_state_change(sk);
1993
			return true;
1994 1995
		}

1996 1997
		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
		msg_set_importance(&tsk->phdr, msg_importance(hdr));
1998

1999 2000 2001 2002 2003
		/* If 'ACK+' message, add to socket receive queue */
		if (msg_data_sz(hdr))
			return true;

		/* If empty 'ACK-' message, wake up sleeping connect() */
2004
		sk->sk_data_ready(sk);
2005 2006 2007 2008 2009

		/* 'ACK-' message is neither accepted nor rejected: */
		msg_set_dest_droppable(hdr, 1);
		return false;

2010
	case TIPC_OPEN:
2011
	case TIPC_DISCONNECTING:
2012 2013
		break;
	case TIPC_LISTEN:
2014
		/* Accept only SYN message */
2015 2016
		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
			return true;
2017
		break;
2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
		if (unlikely(!tsk_peer_msg(tsk, hdr)))
			return false;

		if (unlikely(msg_errcode(hdr))) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			/* Let timer expire on it's own */
			tipc_node_remove_conn(net, tsk_peer_node(tsk),
					      tsk->portid);
			sk->sk_state_change(sk);
		}
		return true;
2031
	default:
2032
		pr_err("Unknown sk_state %u\n", sk->sk_state);
2033
	}
2034

2035
	return false;
2036 2037
}

2038 2039 2040
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
2041
 * @skb: message
2042
 *
2043 2044
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
2045
 *
2046 2047
 * For connectionless messages, queue limits are based on message
 * importance as follows:
2048
 *
2049 2050 2051 2052
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
2053 2054 2055
 *
 * Returns overload limit according to corresponding message importance
 */
2056
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
2057
{
2058 2059 2060
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

2061 2062 2063
	if (unlikely(msg_in_group(hdr)))
		return sk->sk_rcvbuf;

2064 2065
	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
2066

2067 2068
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
2069

2070
	return FLOWCTL_MSG_LIM;
2071 2072
}

2073
/**
J
Jon Maloy 已提交
2074
 * tipc_sk_filter_rcv - validate incoming message
2075
 * @sk: socket
2076
 * @skb: pointer to message.
2077
 *
2078 2079 2080
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
2081
 * Called with socket lock already taken
2082
 *
P
Per Liden 已提交
2083
 */
J
Jon Maloy 已提交
2084 2085
static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
			       struct sk_buff_head *xmitq)
P
Per Liden 已提交
2086
{
J
Jon Maloy 已提交
2087
	bool sk_conn = !tipc_sk_type_connectionless(sk);
2088
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2089
	struct tipc_group *grp = tsk->group;
2090
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
2091 2092 2093
	struct net *net = sock_net(sk);
	struct sk_buff_head inputq;
	int limit, err = TIPC_OK;
2094

J
Jon Maloy 已提交
2095 2096 2097
	TIPC_SKB_CB(skb)->bytes_read = 0;
	__skb_queue_head_init(&inputq);
	__skb_queue_tail(&inputq, skb);
2098

J
Jon Maloy 已提交
2099 2100
	if (unlikely(!msg_isdata(hdr)))
		tipc_sk_proto_rcv(sk, &inputq, xmitq);
2101

J
Jon Maloy 已提交
2102 2103 2104
	if (unlikely(grp))
		tipc_group_filter_msg(grp, &inputq, xmitq);

J
Jon Maloy 已提交
2105 2106 2107 2108 2109
	/* Validate and add to receive buffer if there is space */
	while ((skb = __skb_dequeue(&inputq))) {
		hdr = buf_msg(skb);
		limit = rcvbuf_limit(sk, skb);
		if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
J
Jon Maloy 已提交
2110 2111
		    (!sk_conn && msg_connected(hdr)) ||
		    (!grp && msg_in_group(hdr)))
2112
			err = TIPC_ERR_NO_PORT;
J
Jon Maloy 已提交
2113 2114
		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
			err = TIPC_ERR_OVERLOAD;
P
Per Liden 已提交
2115

J
Jon Maloy 已提交
2116 2117 2118 2119 2120 2121 2122 2123
		if (unlikely(err)) {
			tipc_skb_reject(net, err, skb, xmitq);
			err = TIPC_OK;
			continue;
		}
		__skb_queue_tail(&sk->sk_receive_queue, skb);
		skb_set_owner_r(skb, sk);
		sk->sk_data_ready(sk);
2124
	}
2125
}
P
Per Liden 已提交
2126

2127
/**
J
Jon Maloy 已提交
2128
 * tipc_sk_backlog_rcv - handle incoming message from backlog queue
2129
 * @sk: socket
2130
 * @skb: message
2131
 *
2132
 * Caller must hold socket lock
2133
 */
J
Jon Maloy 已提交
2134
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
2135
{
J
Jon Maloy 已提交
2136
	unsigned int before = sk_rmem_alloc_get(sk);
J
Jon Paul Maloy 已提交
2137
	struct sk_buff_head xmitq;
J
Jon Maloy 已提交
2138
	unsigned int added;
2139

J
Jon Paul Maloy 已提交
2140 2141
	__skb_queue_head_init(&xmitq);

J
Jon Maloy 已提交
2142 2143 2144
	tipc_sk_filter_rcv(sk, skb, &xmitq);
	added = sk_rmem_alloc_get(sk) - before;
	atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
2145

J
Jon Maloy 已提交
2146
	/* Send pending response/rejected messages, if any */
2147
	tipc_node_distr_xmit(sock_net(sk), &xmitq);
2148 2149 2150
	return 0;
}

2151
/**
2152 2153 2154 2155 2156
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
2157 2158 2159
 *
 * Caller must hold socket lock
 */
2160
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
2161
			    u32 dport, struct sk_buff_head *xmitq)
2162
{
J
Jon Paul Maloy 已提交
2163 2164
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
2165 2166
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
2167
	u32 onode;
2168 2169

	while (skb_queue_len(inputq)) {
2170
		if (unlikely(time_after_eq(jiffies, time_limit)))
2171 2172
			return;

2173 2174
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
2175 2176 2177
			return;

		/* Add message directly to receive queue if possible */
2178
		if (!sock_owned_by_user(sk)) {
J
Jon Maloy 已提交
2179
			tipc_sk_filter_rcv(sk, skb, xmitq);
2180
			continue;
2181
		}
2182 2183

		/* Try backlog, compensating for double-counted bytes */
2184
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
2185
		if (!sk->sk_backlog.len)
2186 2187 2188 2189
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
2190 2191

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
2192 2193 2194
		onode = tipc_own_addr(sock_net(sk));
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
2195
		break;
2196
	}
2197 2198
}

2199
/**
2200 2201 2202 2203
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
2204
 */
2205
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
2206
{
J
Jon Paul Maloy 已提交
2207
	struct sk_buff_head xmitq;
2208
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
2209
	int err;
2210 2211
	struct tipc_sock *tsk;
	struct sock *sk;
2212
	struct sk_buff *skb;
2213

J
Jon Paul Maloy 已提交
2214
	__skb_queue_head_init(&xmitq);
2215 2216 2217
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
2218

2219 2220 2221
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
2222
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
2223 2224
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
2225
			/* Send pending response/rejected messages, if any */
2226
			tipc_node_distr_xmit(sock_net(sk), &xmitq);
2227 2228 2229
			sock_put(sk);
			continue;
		}
2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241
		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
2242
			continue;
2243
xmit:
2244
		dnode = msg_destnode(buf_msg(skb));
2245
		tipc_node_xmit_skb(net, skb, dnode, dport);
2246
	}
P
Per Liden 已提交
2247 2248
}

Y
Ying Xue 已提交
2249 2250
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
2251
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
2264
		add_wait_queue(sk_sleep(sk), &wait);
2265
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
2266 2267
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
2268 2269 2270 2271
	} while (!done);
	return 0;
}

P
Per Liden 已提交
2272
/**
2273
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
2274 2275 2276
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
2277
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
2278 2279 2280
 *
 * Returns 0 on success, errno otherwise
 */
2281 2282
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
2283
{
2284
	struct sock *sk = sock->sk;
2285
	struct tipc_sock *tsk = tipc_sk(sk);
2286 2287
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
2288
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
2289
	int previous;
2290
	int res = 0;
2291

2292 2293 2294
	if (destlen != sizeof(struct sockaddr_tipc))
		return -EINVAL;

2295 2296
	lock_sock(sk);

J
Jon Maloy 已提交
2297 2298 2299 2300 2301
	if (tsk->group) {
		res = -EINVAL;
		goto exit;
	}

2302 2303 2304
	if (dst->family == AF_UNSPEC) {
		memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
		if (!tipc_sk_type_connectionless(sk))
2305
			res = -EINVAL;
2306
		goto exit;
2307 2308
	} else if (dst->family != AF_TIPC) {
		res = -EINVAL;
2309
	}
2310
	if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
2311
		res = -EINVAL;
2312 2313 2314 2315 2316 2317
	if (res)
		goto exit;

	/* DGRAM/RDM connect(), just save the destaddr */
	if (tipc_sk_type_connectionless(sk)) {
		memcpy(&tsk->peer, dest, destlen);
2318 2319 2320
		goto exit;
	}

2321
	previous = sk->sk_state;
2322 2323 2324

	switch (sk->sk_state) {
	case TIPC_OPEN:
2325 2326 2327 2328 2329 2330 2331 2332 2333 2334
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

2335
		res = __tipc_sendmsg(sock, &m, 0);
2336 2337 2338
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

2339
		/* Just entered TIPC_CONNECTING state; the only
2340 2341 2342 2343
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
2344 2345 2346 2347 2348
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
2349
			goto exit;
2350
		}
Y
Ying Xue 已提交
2351 2352 2353
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
2354 2355
		break;
	case TIPC_ESTABLISHED:
2356
		res = -EISCONN;
2357 2358
		break;
	default:
2359
		res = -EINVAL;
2360
	}
2361

2362 2363
exit:
	release_sock(sk);
2364
	return res;
P
Per Liden 已提交
2365 2366
}

2367
/**
2368
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
2369 2370
 * @sock: socket structure
 * @len: (unused)
2371
 *
P
Per Liden 已提交
2372 2373
 * Returns 0 on success, errno otherwise
 */
2374
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
2375
{
2376 2377 2378 2379
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
2380
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
2381
	release_sock(sk);
2382

2383
	return res;
P
Per Liden 已提交
2384 2385
}

Y
Ying Xue 已提交
2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
2400
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
2401 2402 2403 2404 2405 2406 2407 2408 2409 2410
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
2411 2412 2413
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
2414 2415 2416 2417 2418
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2419
/**
2420
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2421 2422 2423
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2424
 *
P
Per Liden 已提交
2425 2426
 * Returns 0 on success, errno otherwise
 */
2427 2428
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern)
P
Per Liden 已提交
2429
{
2430
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2431
	struct sk_buff *buf;
2432
	struct tipc_sock *new_tsock;
2433
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2434
	long timeo;
2435
	int res;
P
Per Liden 已提交
2436

2437
	lock_sock(sk);
P
Per Liden 已提交
2438

2439
	if (sk->sk_state != TIPC_LISTEN) {
2440
		res = -EINVAL;
P
Per Liden 已提交
2441 2442
		goto exit;
	}
Y
Ying Xue 已提交
2443 2444 2445 2446
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2447 2448 2449

	buf = skb_peek(&sk->sk_receive_queue);

2450
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
2451 2452
	if (res)
		goto exit;
2453
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2454

2455
	new_sk = new_sock->sk;
2456
	new_tsock = tipc_sk(new_sk);
2457
	msg = buf_msg(buf);
P
Per Liden 已提交
2458

2459 2460 2461 2462 2463 2464 2465
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2466
	tsk_rej_rx_queue(new_sk);
2467 2468

	/* Connect new socket to it's peer */
2469
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2470

2471
	tsk_set_importance(new_tsock, msg_importance(msg));
2472
	if (msg_named(msg)) {
2473 2474
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2475
	}
2476 2477 2478 2479 2480 2481 2482 2483

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2484
		tsk_advance_rx_queue(sk);
2485
		__tipc_sendstream(new_sock, &m, 0);
2486 2487 2488
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2489
		skb_set_owner_r(buf, new_sk);
2490 2491
	}
	release_sock(new_sk);
P
Per Liden 已提交
2492
exit:
2493
	release_sock(sk);
P
Per Liden 已提交
2494 2495 2496 2497
	return res;
}

/**
2498
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2499
 * @sock: socket structure
2500
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2501 2502
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2503
 *
P
Per Liden 已提交
2504 2505
 * Returns 0 on success, errno otherwise
 */
2506
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2507
{
2508
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2509 2510
	int res;

2511 2512
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2513

2514
	lock_sock(sk);
P
Per Liden 已提交
2515

2516 2517
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2518

2519
	if (sk->sk_state == TIPC_DISCONNECTING) {
2520
		/* Discard any unreceived messages */
2521
		__skb_queue_purge(&sk->sk_receive_queue);
2522 2523 2524

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2525
		res = 0;
2526
	} else {
P
Per Liden 已提交
2527 2528 2529
		res = -ENOTCONN;
	}

2530
	release_sock(sk);
P
Per Liden 已提交
2531 2532 2533
	return res;
}

2534
static void tipc_sk_timeout(unsigned long data)
2535
{
2536 2537
	struct tipc_sock *tsk = (struct tipc_sock *)data;
	struct sock *sk = &tsk->sk;
2538
	struct sk_buff *skb = NULL;
2539
	u32 peer_port, peer_node;
2540
	u32 own_node = tsk_own_node(tsk);
2541

J
Jon Paul Maloy 已提交
2542
	bh_lock_sock(sk);
2543
	if (!tipc_sk_connected(sk)) {
J
Jon Paul Maloy 已提交
2544 2545
		bh_unlock_sock(sk);
		goto exit;
2546
	}
2547 2548
	peer_port = tsk_peer_port(tsk);
	peer_node = tsk_peer_node(tsk);
2549

2550
	if (tsk->probe_unacked) {
2551
		if (!sock_owned_by_user(sk)) {
2552
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2553 2554 2555 2556 2557 2558 2559 2560
			tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
					      tsk_peer_port(tsk));
			sk->sk_state_change(sk);
		} else {
			/* Try again later */
			sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
		}

2561 2562
		bh_unlock_sock(sk);
		goto exit;
2563
	}
2564 2565 2566 2567

	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
			      INT_H_SIZE, 0, peer_node, own_node,
			      peer_port, tsk->portid, TIPC_OK);
2568
	tsk->probe_unacked = true;
2569
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
2570
	bh_unlock_sock(sk);
2571
	if (skb)
2572
		tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
J
Jon Paul Maloy 已提交
2573
exit:
2574
	sock_put(sk);
2575 2576
}

2577
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2578 2579
			   struct tipc_name_seq const *seq)
{
2580 2581
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2582 2583 2584
	struct publication *publ;
	u32 key;

2585
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2586
		return -EINVAL;
2587 2588
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2589 2590
		return -EADDRINUSE;

2591
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2592
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2593 2594 2595
	if (unlikely(!publ))
		return -EINVAL;

2596 2597 2598
	list_add(&publ->pport_list, &tsk->publications);
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2599 2600 2601
	return 0;
}

2602
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2603 2604
			    struct tipc_name_seq const *seq)
{
2605
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2606 2607 2608 2609
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

2610
	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
J
Jon Paul Maloy 已提交
2611 2612 2613 2614 2615 2616 2617 2618 2619
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2620
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2621 2622 2623 2624
					      publ->ref, publ->key);
			rc = 0;
			break;
		}
2625
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2626 2627 2628
				      publ->ref, publ->key);
		rc = 0;
	}
2629 2630
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2631 2632 2633
	return rc;
}

2634 2635 2636
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2637
void tipc_sk_reinit(struct net *net)
2638
{
2639
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2640
	struct rhashtable_iter iter;
2641
	struct tipc_sock *tsk;
2642 2643
	struct tipc_msg *msg;

2644 2645 2646 2647
	rhashtable_walk_enter(&tn->sk_rht, &iter);

	do {
		tsk = ERR_PTR(rhashtable_walk_start(&iter));
2648 2649
		if (IS_ERR(tsk))
			goto walk_stop;
2650 2651

		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
2652 2653
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2654 2655
			msg_set_prevnode(msg, tn->own_addr);
			msg_set_orignode(msg, tn->own_addr);
2656 2657
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2658
walk_stop:
2659 2660
		rhashtable_walk_stop(&iter);
	} while (tsk == ERR_PTR(-EAGAIN));
2661 2662
}

2663
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2664
{
2665
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2666
	struct tipc_sock *tsk;
2667

2668
	rcu_read_lock();
2669
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2670 2671 2672
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2673

2674
	return tsk;
2675 2676
}

2677
static int tipc_sk_insert(struct tipc_sock *tsk)
2678
{
2679 2680 2681
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2682 2683
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2684

2685 2686 2687 2688 2689 2690
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2691 2692
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2693 2694
			return 0;
		sock_put(&tsk->sk);
2695 2696
	}

2697
	return -1;
2698 2699
}

2700
static void tipc_sk_remove(struct tipc_sock *tsk)
2701
{
2702
	struct sock *sk = &tsk->sk;
2703
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2704

2705
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2706
		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
2707
		__sock_put(sk);
2708 2709 2710
	}
}

2711 2712 2713 2714 2715 2716 2717
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2718
	.automatic_shrinking = true,
2719 2720
};

2721
int tipc_sk_rht_init(struct net *net)
2722
{
2723
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2724 2725

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2726 2727
}

2728
void tipc_sk_rht_destroy(struct net *net)
2729
{
2730 2731
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2732 2733
	/* Wait for socket readers to complete */
	synchronize_net();
2734

2735
	rhashtable_destroy(&tn->sk_rht);
2736 2737
}

J
Jon Maloy 已提交
2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783
static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
{
	struct net *net = sock_net(&tsk->sk);
	u32 domain = addr_domain(net, mreq->scope);
	struct tipc_group *grp = tsk->group;
	struct tipc_msg *hdr = &tsk->phdr;
	struct tipc_name_seq seq;
	int rc;

	if (mreq->type < TIPC_RESERVED_TYPES)
		return -EACCES;
	if (grp)
		return -EACCES;
	grp = tipc_group_create(net, tsk->portid, mreq);
	if (!grp)
		return -ENOMEM;
	tsk->group = grp;
	msg_set_lookup_scope(hdr, mreq->scope);
	msg_set_nametype(hdr, mreq->type);
	msg_set_dest_droppable(hdr, true);
	seq.type = mreq->type;
	seq.lower = mreq->instance;
	seq.upper = seq.lower;
	tipc_nametbl_build_group(net, grp, mreq->type, domain);
	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
	if (rc)
		tipc_group_delete(net, grp);
	return rc;
}

static int tipc_sk_leave(struct tipc_sock *tsk)
{
	struct net *net = sock_net(&tsk->sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_name_seq seq;
	int scope;

	if (!grp)
		return -EINVAL;
	tipc_group_self(grp, &seq, &scope);
	tipc_group_delete(net, grp);
	tsk->group = NULL;
	tipc_sk_withdraw(tsk, scope, &seq);
	return 0;
}

P
Per Liden 已提交
2784
/**
2785
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2786 2787 2788 2789 2790
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2791 2792
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2793
 * (to ease compatibility).
2794
 *
P
Per Liden 已提交
2795 2796
 * Returns 0 on success, errno otherwise
 */
2797 2798
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2799
{
2800
	struct sock *sk = sock->sk;
2801
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2802
	struct tipc_group_req mreq;
2803
	u32 value = 0;
2804
	int res = 0;
P
Per Liden 已提交
2805

2806 2807
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2808 2809
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2810 2811 2812 2813 2814 2815 2816 2817

	switch (opt) {
	case TIPC_IMPORTANCE:
	case TIPC_SRC_DROPPABLE:
	case TIPC_DEST_DROPPABLE:
	case TIPC_CONN_TIMEOUT:
		if (ol < sizeof(value))
			return -EINVAL;
J
Jon Maloy 已提交
2818 2819 2820 2821 2822 2823 2824 2825
		if (get_user(value, (u32 __user *)ov))
			return -EFAULT;
		break;
	case TIPC_GROUP_JOIN:
		if (ol < sizeof(mreq))
			return -EINVAL;
		if (copy_from_user(&mreq, ov, sizeof(mreq)))
			return -EFAULT;
2826 2827 2828 2829 2830
		break;
	default:
		if (ov || ol)
			return -EINVAL;
	}
P
Per Liden 已提交
2831

2832
	lock_sock(sk);
2833

P
Per Liden 已提交
2834 2835
	switch (opt) {
	case TIPC_IMPORTANCE:
2836
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2837 2838 2839
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2840
			tsk_set_unreliable(tsk, value);
2841
		else
P
Per Liden 已提交
2842 2843 2844
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2845
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2846 2847
		break;
	case TIPC_CONN_TIMEOUT:
2848
		tipc_sk(sk)->conn_timeout = value;
P
Per Liden 已提交
2849
		break;
2850 2851 2852 2853 2854 2855 2856 2857
	case TIPC_MCAST_BROADCAST:
		tsk->mc_method.rcast = false;
		tsk->mc_method.mandatory = true;
		break;
	case TIPC_MCAST_REPLICAST:
		tsk->mc_method.rcast = true;
		tsk->mc_method.mandatory = true;
		break;
J
Jon Maloy 已提交
2858 2859 2860 2861 2862 2863
	case TIPC_GROUP_JOIN:
		res = tipc_sk_join(tsk, &mreq);
		break;
	case TIPC_GROUP_LEAVE:
		res = tipc_sk_leave(tsk);
		break;
P
Per Liden 已提交
2864 2865 2866 2867
	default:
		res = -EINVAL;
	}

2868 2869
	release_sock(sk);

P
Per Liden 已提交
2870 2871 2872 2873
	return res;
}

/**
2874
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2875 2876 2877 2878 2879
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2880 2881
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2882
 * (to ease compatibility).
2883
 *
P
Per Liden 已提交
2884 2885
 * Returns 0 on success, errno otherwise
 */
2886 2887
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2888
{
2889
	struct sock *sk = sock->sk;
2890
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2891 2892
	struct tipc_name_seq seq;
	int len, scope;
P
Per Liden 已提交
2893
	u32 value;
2894
	int res;
P
Per Liden 已提交
2895

2896 2897
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2898 2899
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2900 2901
	res = get_user(len, ol);
	if (res)
2902
		return res;
P
Per Liden 已提交
2903

2904
	lock_sock(sk);
P
Per Liden 已提交
2905 2906 2907

	switch (opt) {
	case TIPC_IMPORTANCE:
2908
		value = tsk_importance(tsk);
P
Per Liden 已提交
2909 2910
		break;
	case TIPC_SRC_DROPPABLE:
2911
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2912 2913
		break;
	case TIPC_DEST_DROPPABLE:
2914
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2915 2916
		break;
	case TIPC_CONN_TIMEOUT:
2917
		value = tsk->conn_timeout;
2918
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2919
		break;
2920
	case TIPC_NODE_RECVQ_DEPTH:
2921
		value = 0; /* was tipc_queue_size, now obsolete */
2922
		break;
2923
	case TIPC_SOCK_RECVQ_DEPTH:
2924 2925
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
J
Jon Maloy 已提交
2926 2927 2928 2929 2930 2931
	case TIPC_GROUP_JOIN:
		seq.type = 0;
		if (tsk->group)
			tipc_group_self(tsk->group, &seq, &scope);
		value = seq.type;
		break;
P
Per Liden 已提交
2932 2933 2934 2935
	default:
		res = -EINVAL;
	}

2936 2937
	release_sock(sk);

2938 2939
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2940

2941 2942 2943 2944 2945 2946 2947
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2948 2949
}

2950
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2951
{
2952
	struct sock *sk = sock->sk;
E
Erik Hugne 已提交
2953 2954 2955 2956 2957 2958 2959
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
2960 2961
		if (!tipc_node_get_linkname(sock_net(sk),
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

2973 2974 2975 2976
static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
{
	struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
	struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
E
Erik Hugne 已提交
2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991
	u32 onode = tipc_own_addr(sock_net(sock1->sk));

	tsk1->peer.family = AF_TIPC;
	tsk1->peer.addrtype = TIPC_ADDR_ID;
	tsk1->peer.scope = TIPC_NODE_SCOPE;
	tsk1->peer.addr.id.ref = tsk2->portid;
	tsk1->peer.addr.id.node = onode;
	tsk2->peer.family = AF_TIPC;
	tsk2->peer.addrtype = TIPC_ADDR_ID;
	tsk2->peer.scope = TIPC_NODE_SCOPE;
	tsk2->peer.addr.id.ref = tsk1->portid;
	tsk2->peer.addr.id.node = onode;

	tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
	tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
2992 2993 2994
	return 0;
}

2995 2996
/* Protocol switches for the various types of TIPC sockets */

2997
static const struct proto_ops msg_ops = {
2998
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2999
	.family		= AF_TIPC,
3000 3001 3002
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
E
Erik Hugne 已提交
3003
	.socketpair	= tipc_socketpair,
3004
	.accept		= sock_no_accept,
3005 3006
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3007
	.ioctl		= tipc_ioctl,
3008
	.listen		= sock_no_listen,
3009 3010 3011 3012 3013
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
3014 3015
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3016 3017
};

3018
static const struct proto_ops packet_ops = {
3019
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3020
	.family		= AF_TIPC,
3021 3022 3023
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
3024
	.socketpair	= tipc_socketpair,
3025 3026 3027
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3028
	.ioctl		= tipc_ioctl,
3029 3030 3031 3032 3033 3034
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
3035 3036
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3037 3038
};

3039
static const struct proto_ops stream_ops = {
3040
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3041
	.family		= AF_TIPC,
3042 3043 3044
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
3045
	.socketpair	= tipc_socketpair,
3046 3047 3048
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
3049
	.ioctl		= tipc_ioctl,
3050 3051 3052 3053
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
3054
	.sendmsg	= tipc_sendstream,
3055
	.recvmsg	= tipc_recvstream,
3056 3057
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
3058 3059
};

3060
static const struct net_proto_family tipc_family_ops = {
3061
	.owner		= THIS_MODULE,
P
Per Liden 已提交
3062
	.family		= AF_TIPC,
3063
	.create		= tipc_sk_create
P
Per Liden 已提交
3064 3065 3066 3067 3068
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
3069 3070
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
3071 3072 3073
};

/**
3074
 * tipc_socket_init - initialize TIPC socket interface
3075
 *
P
Per Liden 已提交
3076 3077
 * Returns 0 on success, errno otherwise
 */
3078
int tipc_socket_init(void)
P
Per Liden 已提交
3079 3080 3081
{
	int res;

3082
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
3083
	if (res) {
3084
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
3085 3086 3087 3088 3089
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
3090
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
3091 3092 3093 3094 3095 3096 3097 3098
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
3099
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
3100
 */
3101
void tipc_socket_stop(void)
P
Per Liden 已提交
3102 3103 3104 3105
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
3106 3107

/* Caller should hold socket lock for the passed tipc socket. */
3108
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
3143 3144
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
3145 3146 3147 3148
{
	int err;
	void *hdr;
	struct nlattr *attrs;
3149 3150
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
3151
	struct sock *sk = &tsk->sk;
3152 3153

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3154
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
3155 3156 3157 3158 3159 3160
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
3161
	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
3162
		goto attr_msg_cancel;
3163
	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
3164 3165
		goto attr_msg_cancel;

3166
	if (tipc_sk_connected(sk)) {
3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190
		err = __tipc_nl_add_sk_con(skb, tsk);
		if (err)
			goto attr_msg_cancel;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			goto attr_msg_cancel;
	}
	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
	struct tipc_sock *tsk;
3191 3192
	const struct bucket_table *tbl;
	struct rhash_head *pos;
3193 3194
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
3195 3196
	u32 tbl_id = cb->args[0];
	u32 prev_portid = cb->args[1];
3197

3198
	rcu_read_lock();
3199
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
3200 3201
	for (; tbl_id < tbl->size; tbl_id++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
3202
			spin_lock_bh(&tsk->sk.sk_lock.slock);
3203 3204 3205 3206 3207
			if (prev_portid && prev_portid != tsk->portid) {
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				continue;
			}

3208
			err = __tipc_nl_add_sk(skb, cb, tsk);
3209 3210 3211 3212 3213 3214
			if (err) {
				prev_portid = tsk->portid;
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				goto out;
			}
			prev_portid = 0;
3215 3216
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
3217
	}
3218
out:
3219
	rcu_read_unlock();
3220 3221
	cb->args[0] = tbl_id;
	cb->args[1] = prev_portid;
3222 3223 3224

	return skb->len;
}
3225 3226

/* Caller should hold socket lock for the passed tipc socket. */
3227 3228 3229
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
3230 3231 3232 3233 3234
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3235
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
3266 3267 3268
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308
{
	int err;
	struct publication *p;

	if (*last_publ) {
		list_for_each_entry(p, &tsk->publications, pport_list) {
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
				     pport_list);
	}

	list_for_each_entry_from(p, &tsk->publications, pport_list) {
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
3309
	u32 tsk_portid = cb->args[0];
3310 3311
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
3312
	struct net *net = sock_net(skb->sk);
3313 3314
	struct tipc_sock *tsk;

3315
	if (!tsk_portid) {
3316 3317 3318 3319 3320 3321 3322
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

3323 3324 3325
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

3326 3327
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
3328
				       tipc_nl_sock_policy, NULL);
3329 3330 3331 3332 3333 3334
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

3335
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
3336 3337 3338 3339 3340
	}

	if (done)
		return 0;

3341
	tsk = tipc_sk_lookup(net, tsk_portid);
3342 3343 3344 3345 3346 3347 3348 3349
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
3350
	sock_put(&tsk->sk);
3351

3352
	cb->args[0] = tsk_portid;
3353 3354 3355 3356 3357
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}