socket.c 77.5 KB
Newer Older
P
Per Liden 已提交
1
/*
2
 * net/tipc/socket.c: TIPC socket API
3
 *
J
Jon Maloy 已提交
4
 * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
5
 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
P
Per Liden 已提交
6 7
 * All rights reserved.
 *
P
Per Liden 已提交
8
 * Redistribution and use in source and binary forms, with or without
P
Per Liden 已提交
9 10
 * modification, are permitted provided that the following conditions are met:
 *
P
Per Liden 已提交
11 12 13 14 15 16 17 18
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
P
Per Liden 已提交
19
 *
P
Per Liden 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
P
Per Liden 已提交
34 35 36
 * POSSIBILITY OF SUCH DAMAGE.
 */

37
#include <linux/rhashtable.h>
38 39
#include <linux/sched/signal.h>

P
Per Liden 已提交
40
#include "core.h"
41
#include "name_table.h"
E
Erik Hugne 已提交
42
#include "node.h"
43
#include "link.h"
44
#include "name_distr.h"
45
#include "socket.h"
46
#include "bcast.h"
47
#include "netlink.h"
J
Jon Maloy 已提交
48
#include "group.h"
49

50
#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
51
#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
52 53 54
#define TIPC_FWD_MSG		1
#define TIPC_MAX_PORT		0xffffffff
#define TIPC_MIN_PORT		1
55
#define TIPC_ACK_RATE		4       /* ACK at 1/4 of of rcv window size */
56

57 58
enum {
	TIPC_LISTEN = TCP_LISTEN,
59
	TIPC_ESTABLISHED = TCP_ESTABLISHED,
60
	TIPC_OPEN = TCP_CLOSE,
61
	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
62
	TIPC_CONNECTING = TCP_SYN_SENT,
63 64
};

65 66 67 68 69 70 71
/**
 * struct tipc_sock - TIPC socket structure
 * @sk: socket - interacts with 'port' and with user via the socket API
 * @conn_type: TIPC type used when connection was established
 * @conn_instance: TIPC instance used when connection was established
 * @published: non-zero if port has one or more associated names
 * @max_pkt: maximum packet size "hint" used when building messages sent by port
72
 * @portid: unique port identity in TIPC socket hash table
73
 * @phdr: preformatted message header used when sending messages
74
 * #cong_links: list of congested links
75
 * @publications: list of publications for port
76
 * @blocking_link: address of the congested link we are currently sleeping on
77 78 79 80
 * @pub_count: total # of publications port has made during its lifetime
 * @probing_state:
 * @conn_timeout: the time we can wait for an unresponded setup request
 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
81
 * @cong_link_cnt: number of congested links
J
Jon Maloy 已提交
82
 * @snt_unacked: # messages sent by socket, and not yet acked by peer
83
 * @rcv_unacked: # messages read by user, but not yet acked back to peer
84
 * @peer: 'connected' peer for dgram/rdm
85
 * @node: hash table node
86
 * @mc_method: cookie for use between socket and broadcast layer
87
 * @rcu: rcu struct for tipc_sock
88 89 90 91 92 93 94
 */
struct tipc_sock {
	struct sock sk;
	u32 conn_type;
	u32 conn_instance;
	int published;
	u32 max_pkt;
95
	u32 portid;
96
	struct tipc_msg phdr;
97
	struct list_head cong_links;
98 99 100 101
	struct list_head publications;
	u32 pub_count;
	uint conn_timeout;
	atomic_t dupl_rcvcnt;
102
	bool probe_unacked;
103
	u16 cong_link_cnt;
104 105
	u16 snt_unacked;
	u16 snd_win;
106
	u16 peer_caps;
107 108
	u16 rcv_unacked;
	u16 rcv_win;
109
	struct sockaddr_tipc peer;
110
	struct rhash_head node;
111
	struct tipc_mc_method mc_method;
112
	struct rcu_head rcu;
J
Jon Maloy 已提交
113
	struct tipc_group *group;
114
};
P
Per Liden 已提交
115

J
Jon Maloy 已提交
116
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
117
static void tipc_data_ready(struct sock *sk);
118
static void tipc_write_space(struct sock *sk);
119
static void tipc_sock_destruct(struct sock *sk);
120
static int tipc_release(struct socket *sock);
121 122
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern);
123
static void tipc_sk_timeout(unsigned long data);
124
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
125
			   struct tipc_name_seq const *seq);
126
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
127
			    struct tipc_name_seq const *seq);
J
Jon Maloy 已提交
128
static int tipc_sk_leave(struct tipc_sock *tsk);
129
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
130 131
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
132
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
133
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
P
Per Liden 已提交
134

135 136 137
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
P
Per Liden 已提交
138
static struct proto tipc_proto;
139 140
static const struct rhashtable_params tsk_rht_params;

141 142 143 144 145
static u32 tsk_own_node(struct tipc_sock *tsk)
{
	return msg_prevnode(&tsk->phdr);
}

146
static u32 tsk_peer_node(struct tipc_sock *tsk)
147
{
148
	return msg_destnode(&tsk->phdr);
149 150
}

151
static u32 tsk_peer_port(struct tipc_sock *tsk)
152
{
153
	return msg_destport(&tsk->phdr);
154 155
}

156
static  bool tsk_unreliable(struct tipc_sock *tsk)
157
{
158
	return msg_src_droppable(&tsk->phdr) != 0;
159 160
}

161
static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
162
{
163
	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
164 165
}

166
static bool tsk_unreturnable(struct tipc_sock *tsk)
167
{
168
	return msg_dest_droppable(&tsk->phdr) != 0;
169 170
}

171
static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
172
{
173
	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
174 175
}

176
static int tsk_importance(struct tipc_sock *tsk)
177
{
178
	return msg_importance(&tsk->phdr);
179 180
}

181
static int tsk_set_importance(struct tipc_sock *tsk, int imp)
182 183 184
{
	if (imp > TIPC_CRITICAL_IMPORTANCE)
		return -EINVAL;
185
	msg_set_importance(&tsk->phdr, (u32)imp);
186 187
	return 0;
}
188

189 190 191 192 193
static struct tipc_sock *tipc_sk(const struct sock *sk)
{
	return container_of(sk, struct tipc_sock, sk);
}

194
static bool tsk_conn_cong(struct tipc_sock *tsk)
195
{
196
	return tsk->snt_unacked > tsk->snd_win;
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
}

/* tsk_blocks(): translate a buffer size in bytes to number of
 * advertisable blocks, taking into account the ratio truesize(len)/len
 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 */
static u16 tsk_adv_blocks(int len)
{
	return len / FLOWCTL_BLK_SZ / 4;
}

/* tsk_inc(): increment counter for sent or received data
 * - If block based flow control is not supported by peer we
 *   fall back to message based ditto, incrementing the counter
 */
static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
{
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return ((msglen / FLOWCTL_BLK_SZ) + 1);
	return 1;
217 218
}

219
/**
220
 * tsk_advance_rx_queue - discard first buffer in socket receive queue
221 222
 *
 * Caller must hold socket lock
P
Per Liden 已提交
223
 */
224
static void tsk_advance_rx_queue(struct sock *sk)
P
Per Liden 已提交
225
{
226
	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
P
Per Liden 已提交
227 228
}

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
/* tipc_sk_respond() : send response message back to sender
 */
static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
{
	u32 selector;
	u32 dnode;
	u32 onode = tipc_own_addr(sock_net(sk));

	if (!tipc_msg_reverse(onode, &skb, err))
		return;

	dnode = msg_destnode(buf_msg(skb));
	selector = msg_origport(buf_msg(skb));
	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
}

P
Per Liden 已提交
245
/**
246
 * tsk_rej_rx_queue - reject all buffers in socket receive queue
247 248
 *
 * Caller must hold socket lock
P
Per Liden 已提交
249
 */
250
static void tsk_rej_rx_queue(struct sock *sk)
P
Per Liden 已提交
251
{
252
	struct sk_buff *skb;
253

254 255
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
P
Per Liden 已提交
256 257
}

258 259
static bool tipc_sk_connected(struct sock *sk)
{
260
	return sk->sk_state == TIPC_ESTABLISHED;
261 262
}

263 264 265 266 267 268 269 270 271 272
/* tipc_sk_type_connectionless - check if the socket is datagram socket
 * @sk: socket
 *
 * Returns true if connection less, false otherwise
 */
static bool tipc_sk_type_connectionless(struct sock *sk)
{
	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
}

273
/* tsk_peer_msg - verify if message was sent by connected port's peer
J
Jon Paul Maloy 已提交
274 275 276 277
 *
 * Handles cases where the node's network address has changed from
 * the default of <0.0.0> to its configured setting.
 */
278
static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
J
Jon Paul Maloy 已提交
279
{
280 281
	struct sock *sk = &tsk->sk;
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
282
	u32 peer_port = tsk_peer_port(tsk);
J
Jon Paul Maloy 已提交
283 284 285
	u32 orig_node;
	u32 peer_node;

286
	if (unlikely(!tipc_sk_connected(sk)))
J
Jon Paul Maloy 已提交
287 288 289 290 291 292
		return false;

	if (unlikely(msg_origport(msg) != peer_port))
		return false;

	orig_node = msg_orignode(msg);
293
	peer_node = tsk_peer_node(tsk);
J
Jon Paul Maloy 已提交
294 295 296 297

	if (likely(orig_node == peer_node))
		return true;

298
	if (!orig_node && (peer_node == tn->own_addr))
J
Jon Paul Maloy 已提交
299 300
		return true;

301
	if (!peer_node && (orig_node == tn->own_addr))
J
Jon Paul Maloy 已提交
302 303 304 305 306
		return true;

	return false;
}

307 308 309 310 311 312 313 314 315
/* tipc_set_sk_state - set the sk_state of the socket
 * @sk: socket
 *
 * Caller must hold socket lock
 *
 * Returns 0 on success, errno otherwise
 */
static int tipc_set_sk_state(struct sock *sk, int state)
{
316
	int oldsk_state = sk->sk_state;
317 318 319
	int res = -EINVAL;

	switch (state) {
320 321 322
	case TIPC_OPEN:
		res = 0;
		break;
323
	case TIPC_LISTEN:
324
	case TIPC_CONNECTING:
325
		if (oldsk_state == TIPC_OPEN)
326 327
			res = 0;
		break;
328
	case TIPC_ESTABLISHED:
329
		if (oldsk_state == TIPC_CONNECTING ||
330
		    oldsk_state == TIPC_OPEN)
331 332
			res = 0;
		break;
333
	case TIPC_DISCONNECTING:
334
		if (oldsk_state == TIPC_CONNECTING ||
335 336 337
		    oldsk_state == TIPC_ESTABLISHED)
			res = 0;
		break;
338 339 340 341 342 343 344 345
	}

	if (!res)
		sk->sk_state = state;

	return res;
}

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
static int tipc_sk_sock_err(struct socket *sock, long *timeout)
{
	struct sock *sk = sock->sk;
	int err = sock_error(sk);
	int typ = sock->type;

	if (err)
		return err;
	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
		if (sk->sk_state == TIPC_DISCONNECTING)
			return -EPIPE;
		else if (!tipc_sk_connected(sk))
			return -ENOTCONN;
	}
	if (!*timeout)
		return -EAGAIN;
	if (signal_pending(current))
		return sock_intr_errno(*timeout);

	return 0;
}

368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
#define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
({                                                                             \
	struct sock *sk_;						       \
	int rc_;							       \
									       \
	while ((rc_ = !(condition_))) {					       \
		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
		sk_ = (sock_)->sk;					       \
		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
		if (rc_)						       \
			break;						       \
		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
		release_sock(sk_);					       \
		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
		sched_annotate_sleep();				               \
		lock_sock(sk_);						       \
		remove_wait_queue(sk_sleep(sk_), &wait_);		       \
	}								       \
	rc_;								       \
387 388
})

P
Per Liden 已提交
389
/**
390
 * tipc_sk_create - create a TIPC socket
391
 * @net: network namespace (must be default network)
P
Per Liden 已提交
392 393
 * @sock: pre-allocated socket structure
 * @protocol: protocol indicator (must be 0)
394
 * @kern: caused by kernel or by userspace?
395
 *
396 397
 * This routine creates additional data structures used by the TIPC socket,
 * initializes them, and links them together.
P
Per Liden 已提交
398 399 400
 *
 * Returns 0 on success, errno otherwise
 */
401 402
static int tipc_sk_create(struct net *net, struct socket *sock,
			  int protocol, int kern)
P
Per Liden 已提交
403
{
404
	struct tipc_net *tn;
405
	const struct proto_ops *ops;
P
Per Liden 已提交
406
	struct sock *sk;
407
	struct tipc_sock *tsk;
408
	struct tipc_msg *msg;
409 410

	/* Validate arguments */
P
Per Liden 已提交
411 412 413 414 415
	if (unlikely(protocol != 0))
		return -EPROTONOSUPPORT;

	switch (sock->type) {
	case SOCK_STREAM:
416
		ops = &stream_ops;
P
Per Liden 已提交
417 418
		break;
	case SOCK_SEQPACKET:
419
		ops = &packet_ops;
P
Per Liden 已提交
420 421 422
		break;
	case SOCK_DGRAM:
	case SOCK_RDM:
423
		ops = &msg_ops;
P
Per Liden 已提交
424
		break;
425 426
	default:
		return -EPROTOTYPE;
P
Per Liden 已提交
427 428
	}

429
	/* Allocate socket's protocol area */
430
	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
431
	if (sk == NULL)
P
Per Liden 已提交
432 433
		return -ENOMEM;

434
	tsk = tipc_sk(sk);
435 436
	tsk->max_pkt = MAX_PKT_DEFAULT;
	INIT_LIST_HEAD(&tsk->publications);
437
	INIT_LIST_HEAD(&tsk->cong_links);
438
	msg = &tsk->phdr;
439
	tn = net_generic(sock_net(sk), tipc_net_id);
P
Per Liden 已提交
440

441 442 443
	/* Finish initializing socket data structures */
	sock->ops = ops;
	sock_init_data(sock, sk);
444
	tipc_set_sk_state(sk, TIPC_OPEN);
445
	if (tipc_sk_insert(tsk)) {
M
Masanari Iida 已提交
446
		pr_warn("Socket create failed; port number exhausted\n");
447 448
		return -EINVAL;
	}
449 450 451 452 453 454 455

	/* Ensure tsk is visible before we read own_addr. */
	smp_mb();

	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
		      NAMED_H_SIZE, 0);

456
	msg_set_origport(msg, tsk->portid);
457
	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
458
	sk->sk_shutdown = 0;
J
Jon Maloy 已提交
459
	sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
460
	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
461 462
	sk->sk_data_ready = tipc_data_ready;
	sk->sk_write_space = tipc_write_space;
463
	sk->sk_destruct = tipc_sock_destruct;
464 465
	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
	atomic_set(&tsk->dupl_rcvcnt, 0);
466

467 468 469 470
	/* Start out with safe limits until we receive an advertised window */
	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
	tsk->rcv_win = tsk->snd_win;

471
	if (tipc_sk_type_connectionless(sk)) {
472
		tsk_set_unreturnable(tsk, true);
473
		if (sock->type == SOCK_DGRAM)
474
			tsk_set_unreliable(tsk, true);
475
	}
476

P
Per Liden 已提交
477 478 479
	return 0;
}

480 481 482 483 484 485 486
static void tipc_sk_callback(struct rcu_head *head)
{
	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);

	sock_put(&tsk->sk);
}

487 488 489 490 491 492
/* Caller should hold socket lock for the socket. */
static void __tipc_shutdown(struct socket *sock, int error)
{
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
	struct net *net = sock_net(sk);
493
	long timeout = CONN_TIMEOUT_DEFAULT;
494 495 496
	u32 dnode = tsk_peer_node(tsk);
	struct sk_buff *skb;

497 498 499 500
	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
					    !tsk_conn_cong(tsk)));

501 502 503 504 505 506
	/* Reject all unreceived messages, except on an active connection
	 * (which disconnects locally & sends a 'FIN+' to peer).
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		if (TIPC_SKB_CB(skb)->bytes_read) {
			kfree_skb(skb);
507
			continue;
508
		}
509 510 511 512 513 514
		if (!tipc_sk_type_connectionless(sk) &&
		    sk->sk_state != TIPC_DISCONNECTING) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			tipc_node_remove_conn(net, dnode, tsk->portid);
		}
		tipc_sk_respond(sk, skb, error);
515
	}
516 517 518 519

	if (tipc_sk_type_connectionless(sk))
		return;

520 521 522 523 524 525 526
	if (sk->sk_state != TIPC_DISCONNECTING) {
		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
				      tsk_own_node(tsk), tsk_peer_port(tsk),
				      tsk->portid, error);
		if (skb)
			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
527 528
		tipc_node_remove_conn(net, dnode, tsk->portid);
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
529 530 531
	}
}

P
Per Liden 已提交
532
/**
533
 * tipc_release - destroy a TIPC socket
P
Per Liden 已提交
534 535 536 537 538 539 540
 * @sock: socket to destroy
 *
 * This routine cleans up any messages that are still queued on the socket.
 * For DGRAM and RDM socket types, all queued messages are rejected.
 * For SEQPACKET and STREAM socket types, the first message is rejected
 * and any others are discarded.  (If the first message on a STREAM socket
 * is partially-read, it is discarded and the next one is rejected instead.)
541
 *
P
Per Liden 已提交
542 543 544 545 546 547
 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 * are returned or discarded according to the "destination droppable" setting
 * specified for the message by the sender.
 *
 * Returns 0 on success, errno otherwise
 */
548
static int tipc_release(struct socket *sock)
P
Per Liden 已提交
549 550
{
	struct sock *sk = sock->sk;
551
	struct tipc_sock *tsk;
P
Per Liden 已提交
552

553 554 555 556 557
	/*
	 * Exit if socket isn't fully initialized (occurs when a failed accept()
	 * releases a pre-allocated child socket that was never used)
	 */
	if (sk == NULL)
P
Per Liden 已提交
558
		return 0;
559

560
	tsk = tipc_sk(sk);
561 562
	lock_sock(sk);

563 564
	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
	sk->sk_shutdown = SHUTDOWN_MASK;
J
Jon Maloy 已提交
565
	tipc_sk_leave(tsk);
566
	tipc_sk_withdraw(tsk, 0, NULL);
567
	sk_stop_timer(sk, &sk->sk_timer);
568
	tipc_sk_remove(tsk);
P
Per Liden 已提交
569

570 571
	/* Reject any messages that accumulated in backlog queue */
	release_sock(sk);
J
Jon Maloy 已提交
572
	tipc_dest_list_purge(&tsk->cong_links);
573
	tsk->cong_link_cnt = 0;
574
	call_rcu(&tsk->rcu, tipc_sk_callback);
575
	sock->sk = NULL;
P
Per Liden 已提交
576

577
	return 0;
P
Per Liden 已提交
578 579 580
}

/**
581
 * tipc_bind - associate or disassocate TIPC name(s) with a socket
P
Per Liden 已提交
582 583 584
 * @sock: socket structure
 * @uaddr: socket address describing name(s) and desired operation
 * @uaddr_len: size of socket address data structure
585
 *
P
Per Liden 已提交
586 587 588
 * Name and name sequence binding is indicated using a positive scope value;
 * a negative scope value unbinds the specified name.  Specifying no name
 * (i.e. a socket address length of 0) unbinds all names from the socket.
589
 *
P
Per Liden 已提交
590
 * Returns 0 on success, errno otherwise
591 592 593
 *
 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 *       access any non-constant socket information.
P
Per Liden 已提交
594
 */
595 596
static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
		     int uaddr_len)
P
Per Liden 已提交
597
{
598
	struct sock *sk = sock->sk;
P
Per Liden 已提交
599
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
600
	struct tipc_sock *tsk = tipc_sk(sk);
601
	int res = -EINVAL;
P
Per Liden 已提交
602

603 604
	lock_sock(sk);
	if (unlikely(!uaddr_len)) {
605
		res = tipc_sk_withdraw(tsk, 0, NULL);
606 607
		goto exit;
	}
J
Jon Maloy 已提交
608 609 610 611
	if (tsk->group) {
		res = -EACCES;
		goto exit;
	}
612 613 614 615 616 617 618 619
	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
		res = -EINVAL;
		goto exit;
	}
	if (addr->family != AF_TIPC) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
P
Per Liden 已提交
620 621 622

	if (addr->addrtype == TIPC_ADDR_NAME)
		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
623 624 625 626
	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
		res = -EAFNOSUPPORT;
		goto exit;
	}
627

628
	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
629
	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
630 631 632 633
	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
		res = -EACCES;
		goto exit;
	}
634

635
	res = (addr->scope > 0) ?
636 637
		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
638 639 640
exit:
	release_sock(sk);
	return res;
P
Per Liden 已提交
641 642
}

643
/**
644
 * tipc_getname - get port ID of socket or peer socket
P
Per Liden 已提交
645 646 647
 * @sock: socket structure
 * @uaddr: area for returned socket address
 * @uaddr_len: area for returned length of socket address
648
 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
649
 *
P
Per Liden 已提交
650
 * Returns 0 on success, errno otherwise
651
 *
652 653
 * NOTE: This routine doesn't need to take the socket lock since it only
 *       accesses socket information that is unchanging (or which changes in
654
 *       a completely predictable manner).
P
Per Liden 已提交
655
 */
656 657
static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
			int *uaddr_len, int peer)
P
Per Liden 已提交
658 659
{
	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
660 661
	struct sock *sk = sock->sk;
	struct tipc_sock *tsk = tipc_sk(sk);
662
	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
P
Per Liden 已提交
663

664
	memset(addr, 0, sizeof(*addr));
665
	if (peer) {
666
		if ((!tipc_sk_connected(sk)) &&
667
		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
668
			return -ENOTCONN;
669 670
		addr->addr.id.ref = tsk_peer_port(tsk);
		addr->addr.id.node = tsk_peer_node(tsk);
671
	} else {
672
		addr->addr.id.ref = tsk->portid;
673
		addr->addr.id.node = tn->own_addr;
674
	}
P
Per Liden 已提交
675 676 677 678 679 680 681

	*uaddr_len = sizeof(*addr);
	addr->addrtype = TIPC_ADDR_ID;
	addr->family = AF_TIPC;
	addr->scope = 0;
	addr->addr.name.domain = 0;

682
	return 0;
P
Per Liden 已提交
683 684 685
}

/**
686
 * tipc_poll - read and possibly block on pollmask
P
Per Liden 已提交
687 688 689 690
 * @file: file structure associated with the socket
 * @sock: socket for which to calculate the poll bits
 * @wait: ???
 *
691 692 693 694 695 696 697 698
 * Returns pollmask value
 *
 * COMMENTARY:
 * It appears that the usual socket locking mechanisms are not useful here
 * since the pollmask info is potentially out-of-date the moment this routine
 * exits.  TCP and other protocols seem to rely on higher level poll routines
 * to handle any preventable race conditions, so TIPC will do the same ...
 *
699 700 701
 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 * imply that the operation will succeed, merely that it should be performed
 * and will not block.
P
Per Liden 已提交
702
 */
703 704
static unsigned int tipc_poll(struct file *file, struct socket *sock,
			      poll_table *wait)
P
Per Liden 已提交
705
{
706
	struct sock *sk = sock->sk;
707
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
708
	struct tipc_group *grp = tsk->group;
709
	u32 mask = 0;
710

711
	sock_poll_wait(file, sk_sleep(sk), wait);
712

713 714 715 716 717
	if (sk->sk_shutdown & RCV_SHUTDOWN)
		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
	if (sk->sk_shutdown == SHUTDOWN_MASK)
		mask |= POLLHUP;

718 719
	switch (sk->sk_state) {
	case TIPC_ESTABLISHED:
720
		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
721
			mask |= POLLOUT;
722 723 724
		/* fall thru' */
	case TIPC_LISTEN:
	case TIPC_CONNECTING:
725 726
		if (!skb_queue_empty(&sk->sk_receive_queue))
			mask |= (POLLIN | POLLRDNORM);
727 728
		break;
	case TIPC_OPEN:
J
Jon Maloy 已提交
729 730 731
		if (!grp || tipc_group_size(grp))
			if (!tsk->cong_link_cnt)
				mask |= POLLOUT;
732 733 734 735 736 737 738
		if (tipc_sk_type_connectionless(sk) &&
		    (!skb_queue_empty(&sk->sk_receive_queue)))
			mask |= (POLLIN | POLLRDNORM);
		break;
	case TIPC_DISCONNECTING:
		mask = (POLLIN | POLLRDNORM | POLLHUP);
		break;
739
	}
740 741

	return mask;
P
Per Liden 已提交
742 743
}

744 745 746 747
/**
 * tipc_sendmcast - send multicast message
 * @sock: socket structure
 * @seq: destination address
748
 * @msg: message to send
749 750
 * @dlen: length of data to send
 * @timeout: timeout to wait for wakeup
751 752 753 754 755
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
756
			  struct msghdr *msg, size_t dlen, long timeout)
757 758
{
	struct sock *sk = sock->sk;
759
	struct tipc_sock *tsk = tipc_sk(sk);
760
	struct tipc_msg *hdr = &tsk->phdr;
761
	struct net *net = sock_net(sk);
762
	int mtu = tipc_bcast_get_mtu(net);
763
	struct tipc_mc_method *method = &tsk->mc_method;
764
	u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE);
765
	struct sk_buff_head pkts;
766
	struct tipc_nlist dsts;
767 768
	int rc;

J
Jon Maloy 已提交
769 770 771
	if (tsk->group)
		return -EACCES;

772
	/* Block or return if any destination link is congested */
773 774 775
	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;
776

777 778 779 780 781 782 783 784
	/* Lookup destination nodes */
	tipc_nlist_init(&dsts, tipc_own_addr(net));
	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
				      seq->upper, domain, &dsts);
	if (!dsts.local && !dsts.remote)
		return -EHOSTUNREACH;

	/* Build message header */
785
	msg_set_type(hdr, TIPC_MCAST_MSG);
786
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
787 788 789 790 791 792 793
	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nametype(hdr, seq->type);
	msg_set_namelower(hdr, seq->lower);
	msg_set_nameupper(hdr, seq->upper);

794
	/* Build message as chain of buffers */
795 796
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
797

798 799
	/* Send message if build was successful */
	if (unlikely(rc == dlen))
800
		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
801 802 803
				     &tsk->cong_link_cnt);

	tipc_nlist_purge(&dsts);
804 805

	return rc ? rc : dlen;
806 807
}

J
Jon Maloy 已提交
808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865
/**
 * tipc_send_group_bcast - send message to all members in communication group
 * @sk: socket structure
 * @m: message to send
 * @dlen: total length of message data
 * @timeout: timeout to wait for wakeup
 *
 * Called from function tipc_sendmsg(), which has done all sanity checks
 * Returns the number of bytes sent on success, or errno
 */
static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
				 int dlen, long timeout)
{
	struct sock *sk = sock->sk;
	struct net *net = sock_net(sk);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_nlist *dsts = tipc_group_dests(grp);
	struct tipc_mc_method *method = &tsk->mc_method;
	struct tipc_msg *hdr = &tsk->phdr;
	int mtu = tipc_bcast_get_mtu(net);
	struct sk_buff_head pkts;
	int rc = -EHOSTUNREACH;

	if (!dsts->local && !dsts->remote)
		return -EHOSTUNREACH;

	/* Block or return if any destination link is congested */
	rc = tipc_wait_for_cond(sock, &timeout,	!tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;

	/* Complete message header */
	msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
	msg_set_destport(hdr, 0);
	msg_set_destnode(hdr, 0);
	msg_set_nameinst(hdr, 0);
	msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));

	/* Build message as chain of buffers */
	skb_queue_head_init(&pkts);
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
		return rc;

	/* Send message */
	rc = tipc_mcast_xmit(net, &pkts, method, dsts,
			     &tsk->cong_link_cnt);
	if (unlikely(rc))
		return rc;

	/* Update broadcast sequence number */
	tipc_group_update_bc_members(tsk->group);

	return dlen;
}

866 867 868 869 870 871
/**
 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 *
 * Multi-threaded: parallel calls with reference to same queues may occur
872
 */
873 874
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
		       struct sk_buff_head *inputq)
875 876
{
	u32 scope = TIPC_CLUSTER_SCOPE;
J
Jon Maloy 已提交
877
	u32 self = tipc_own_addr(net);
878
	struct sk_buff *skb, *_skb;
J
Jon Maloy 已提交
879 880 881 882 883 884
	u32 lower = 0, upper = ~0;
	struct sk_buff_head tmpq;
	u32 portid, oport, onode;
	struct list_head dports;
	struct tipc_msg *msg;
	int hsz;
885

886
	__skb_queue_head_init(&tmpq);
887
	INIT_LIST_HEAD(&dports);
888

889 890 891 892
	skb = tipc_skb_peek(arrvq, &inputq->lock);
	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
		msg = buf_msg(skb);
		hsz = skb_headroom(skb) + msg_hdr_sz(msg);
J
Jon Maloy 已提交
893 894 895
		oport = msg_origport(msg);
		onode = msg_orignode(msg);
		if (onode == self)
896 897 898
			scope = TIPC_NODE_SCOPE;

		/* Create destination port list and message clones: */
J
Jon Maloy 已提交
899 900 901 902 903 904
		if (!msg_in_group(msg)) {
			lower = msg_namelower(msg);
			upper = msg_nameupper(msg);
		}
		tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
					  scope, &dports);
J
Jon Maloy 已提交
905
		while (tipc_dest_pop(&dports, NULL, &portid)) {
906 907 908 909 910 911 912
			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
			if (_skb) {
				msg_set_destport(buf_msg(_skb), portid);
				__skb_queue_tail(&tmpq, _skb);
				continue;
			}
			pr_warn("Failed to clone mcast rcv buffer\n");
913
		}
914 915 916 917 918 919 920 921 922
		/* Append to inputq if not already done by other thread */
		spin_lock_bh(&inputq->lock);
		if (skb_peek(arrvq) == skb) {
			skb_queue_splice_tail_init(&tmpq, inputq);
			kfree_skb(__skb_dequeue(arrvq));
		}
		spin_unlock_bh(&inputq->lock);
		__skb_queue_purge(&tmpq);
		kfree_skb(skb);
923
	}
924
	tipc_sk_rcv(net, inputq);
925 926
}

927
/**
J
Jon Maloy 已提交
928
 * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
929
 * @tsk: receiving socket
930
 * @skb: pointer to message buffer.
931
 */
J
Jon Maloy 已提交
932 933
static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
				   struct sk_buff_head *xmitq)
934
{
935
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
936 937
	u32 onode = tsk_own_node(tsk);
	struct sock *sk = &tsk->sk;
938
	int mtyp = msg_type(hdr);
939
	bool conn_cong;
940

941
	/* Ignore if connection cannot be validated: */
942
	if (!tsk_peer_msg(tsk, hdr))
943 944
		goto exit;

945 946 947 948 949 950 951 952
	if (unlikely(msg_errcode(hdr))) {
		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
		tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
				      tsk_peer_port(tsk));
		sk->sk_state_change(sk);
		goto exit;
	}

953
	tsk->probe_unacked = false;
954

955 956
	if (mtyp == CONN_PROBE) {
		msg_set_type(hdr, CONN_PROBE_REPLY);
J
Jon Paul Maloy 已提交
957 958
		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
			__skb_queue_tail(xmitq, skb);
959 960
		return;
	} else if (mtyp == CONN_ACK) {
961
		conn_cong = tsk_conn_cong(tsk);
962 963 964
		tsk->snt_unacked -= msg_conn_ack(hdr);
		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
			tsk->snd_win = msg_adv_win(hdr);
965
		if (conn_cong)
966 967 968
			sk->sk_write_space(sk);
	} else if (mtyp != CONN_PROBE_REPLY) {
		pr_warn("Received unknown CONN_PROTO msg\n");
969 970
	}
exit:
971
	kfree_skb(skb);
972 973
}

P
Per Liden 已提交
974
/**
975
 * tipc_sendmsg - send message in connectionless manner
P
Per Liden 已提交
976 977
 * @sock: socket structure
 * @m: message to send
978
 * @dsz: amount of user data to be sent
979
 *
P
Per Liden 已提交
980
 * Message must have an destination specified explicitly.
981
 * Used for SOCK_RDM and SOCK_DGRAM messages,
P
Per Liden 已提交
982 983
 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
984
 *
P
Per Liden 已提交
985 986
 * Returns the number of bytes sent on success, or errno otherwise
 */
987
static int tipc_sendmsg(struct socket *sock,
988
			struct msghdr *m, size_t dsz)
989 990 991 992 993 994 995 996 997 998 999
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
	ret = __tipc_sendmsg(sock, m, dsz);
	release_sock(sk);

	return ret;
}

1000
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1001
{
1002
	struct sock *sk = sock->sk;
1003
	struct net *net = sock_net(sk);
1004 1005 1006 1007 1008
	struct tipc_sock *tsk = tipc_sk(sk);
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct list_head *clinks = &tsk->cong_links;
	bool syn = !tipc_sk_type_connectionless(sk);
J
Jon Maloy 已提交
1009
	struct tipc_group *grp = tsk->group;
1010
	struct tipc_msg *hdr = &tsk->phdr;
1011
	struct tipc_name_seq *seq;
1012 1013 1014 1015
	struct sk_buff_head pkts;
	u32 type, inst, domain;
	u32 dnode, dport;
	int mtu, rc;
P
Per Liden 已提交
1016

1017
	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
1018
		return -EMSGSIZE;
1019

J
Jon Maloy 已提交
1020 1021 1022
	if (unlikely(grp))
		return tipc_send_group_bcast(sock, m, dlen, timeout);

1023
	if (unlikely(!dest)) {
1024 1025
		dest = &tsk->peer;
		if (!syn || dest->family != AF_TIPC)
1026 1027
			return -EDESTADDRREQ;
	}
1028 1029 1030 1031 1032 1033 1034 1035

	if (unlikely(m->msg_namelen < sizeof(*dest)))
		return -EINVAL;

	if (unlikely(dest->family != AF_TIPC))
		return -EINVAL;

	if (unlikely(syn)) {
1036
		if (sk->sk_state == TIPC_LISTEN)
1037
			return -EPIPE;
1038
		if (sk->sk_state != TIPC_OPEN)
1039 1040 1041
			return -EISCONN;
		if (tsk->published)
			return -EOPNOTSUPP;
1042
		if (dest->addrtype == TIPC_ADDR_NAME) {
1043 1044
			tsk->conn_type = dest->addr.name.name.type;
			tsk->conn_instance = dest->addr.name.name.instance;
1045
		}
P
Per Liden 已提交
1046
	}
1047

1048 1049 1050
	seq = &dest->addr.nameseq;
	if (dest->addrtype == TIPC_ADDR_MCAST)
		return tipc_sendmcast(sock, seq, m, dlen, timeout);
1051

1052 1053 1054 1055
	if (dest->addrtype == TIPC_ADDR_NAME) {
		type = dest->addr.name.name.type;
		inst = dest->addr.name.name.instance;
		domain = dest->addr.name.domain;
1056
		dnode = domain;
1057 1058 1059 1060 1061
		msg_set_type(hdr, TIPC_NAMED_MSG);
		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
		msg_set_nametype(hdr, type);
		msg_set_nameinst(hdr, inst);
		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
1062
		dport = tipc_nametbl_translate(net, type, inst, &dnode);
1063 1064
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dport);
1065 1066
		if (unlikely(!dport && !dnode))
			return -EHOSTUNREACH;
1067

1068 1069
	} else if (dest->addrtype == TIPC_ADDR_ID) {
		dnode = dest->addr.id.node;
1070 1071 1072 1073 1074
		msg_set_type(hdr, TIPC_DIRECT_MSG);
		msg_set_lookup_scope(hdr, 0);
		msg_set_destnode(hdr, dnode);
		msg_set_destport(hdr, dest->addr.id.ref);
		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
1075 1076
	}

1077
	/* Block or return if destination link is congested */
J
Jon Maloy 已提交
1078 1079
	rc = tipc_wait_for_cond(sock, &timeout,
				!tipc_dest_find(clinks, dnode, 0));
1080 1081 1082 1083
	if (unlikely(rc))
		return rc;

	skb_queue_head_init(&pkts);
1084
	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
1085 1086
	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
	if (unlikely(rc != dlen))
1087
		return rc;
1088

1089 1090
	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
	if (unlikely(rc == -ELINKCONG)) {
J
Jon Maloy 已提交
1091
		tipc_dest_push(clinks, dnode, 0);
1092 1093 1094
		tsk->cong_link_cnt++;
		rc = 0;
	}
1095

1096 1097 1098 1099
	if (unlikely(syn && !rc))
		tipc_set_sk_state(sk, TIPC_CONNECTING);

	return rc ? rc : dlen;
P
Per Liden 已提交
1100 1101
}

1102
/**
1103
 * tipc_sendstream - send stream-oriented data
P
Per Liden 已提交
1104
 * @sock: socket structure
1105 1106
 * @m: data to send
 * @dsz: total length of data to be transmitted
1107
 *
1108
 * Used for SOCK_STREAM data.
1109
 *
1110 1111
 * Returns the number of bytes sent on success (or partial success),
 * or errno if no data sent
P
Per Liden 已提交
1112
 */
1113
static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1114 1115 1116 1117 1118
{
	struct sock *sk = sock->sk;
	int ret;

	lock_sock(sk);
1119
	ret = __tipc_sendstream(sock, m, dsz);
1120 1121 1122 1123 1124
	release_sock(sk);

	return ret;
}

1125
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
P
Per Liden 已提交
1126
{
1127
	struct sock *sk = sock->sk;
1128
	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1129 1130 1131 1132 1133 1134 1135 1136
	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = &tsk->phdr;
	struct net *net = sock_net(sk);
	struct sk_buff_head pkts;
	u32 dnode = tsk_peer_node(tsk);
	int send, sent = 0;
	int rc = 0;
1137

1138
	skb_queue_head_init(&pkts);
1139

1140 1141
	if (unlikely(dlen > INT_MAX))
		return -EMSGSIZE;
1142

1143 1144 1145 1146 1147
	/* Handle implicit connection setup */
	if (unlikely(dest)) {
		rc = __tipc_sendmsg(sock, m, dlen);
		if (dlen && (dlen == rc))
			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1148
		return rc;
1149
	}
1150

1151
	do {
1152 1153
		rc = tipc_wait_for_cond(sock, &timeout,
					(!tsk->cong_link_cnt &&
1154 1155
					 !tsk_conn_cong(tsk) &&
					 tipc_sk_connected(sk)));
1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
		if (unlikely(rc))
			break;

		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
		if (unlikely(rc != send))
			break;

		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
		if (unlikely(rc == -ELINKCONG)) {
			tsk->cong_link_cnt = 1;
			rc = 0;
		}
		if (likely(!rc)) {
			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
			sent += send;
		}
	} while (sent < dlen && !rc);
1174

1175
	return sent ? sent : rc;
P
Per Liden 已提交
1176 1177
}

1178
/**
1179
 * tipc_send_packet - send a connection-oriented message
P
Per Liden 已提交
1180
 * @sock: socket structure
1181 1182
 * @m: message to send
 * @dsz: length of data to be transmitted
1183
 *
1184
 * Used for SOCK_SEQPACKET messages.
1185
 *
1186
 * Returns the number of bytes sent on success, or errno otherwise
P
Per Liden 已提交
1187
 */
1188
static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
P
Per Liden 已提交
1189
{
1190 1191
	if (dsz > TIPC_MAX_USER_MSG_SIZE)
		return -EMSGSIZE;
P
Per Liden 已提交
1192

1193
	return tipc_sendstream(sock, m, dsz);
P
Per Liden 已提交
1194 1195
}

1196
/* tipc_sk_finish_conn - complete the setup of a connection
P
Per Liden 已提交
1197
 */
1198
static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1199
				u32 peer_node)
P
Per Liden 已提交
1200
{
1201 1202
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1203
	struct tipc_msg *msg = &tsk->phdr;
P
Per Liden 已提交
1204

1205 1206 1207 1208 1209
	msg_set_destnode(msg, peer_node);
	msg_set_destport(msg, peer_port);
	msg_set_type(msg, TIPC_CONN_MSG);
	msg_set_lookup_scope(msg, 0);
	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1210

1211
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
1212
	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1213 1214
	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1215
	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1216 1217 1218 1219 1220 1221
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
		return;

	/* Fall back to message based flow control */
	tsk->rcv_win = FLOWCTL_MSG_WIN;
	tsk->snd_win = FLOWCTL_MSG_WIN;
P
Per Liden 已提交
1222 1223 1224 1225 1226 1227
}

/**
 * set_orig_addr - capture sender's address for received message
 * @m: descriptor for message info
 * @msg: received message header
1228
 *
P
Per Liden 已提交
1229 1230
 * Note: Address is not captured if not requested by receiver.
 */
S
Sam Ravnborg 已提交
1231
static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
P
Per Liden 已提交
1232
{
1233
	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
P
Per Liden 已提交
1234

1235
	if (addr) {
P
Per Liden 已提交
1236 1237
		addr->family = AF_TIPC;
		addr->addrtype = TIPC_ADDR_ID;
1238
		memset(&addr->addr, 0, sizeof(addr->addr));
P
Per Liden 已提交
1239 1240
		addr->addr.id.ref = msg_origport(msg);
		addr->addr.id.node = msg_orignode(msg);
1241 1242
		addr->addr.name.domain = 0;	/* could leave uninitialized */
		addr->scope = 0;		/* could leave uninitialized */
P
Per Liden 已提交
1243 1244 1245 1246 1247
		m->msg_namelen = sizeof(struct sockaddr_tipc);
	}
}

/**
1248
 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
P
Per Liden 已提交
1249 1250
 * @m: descriptor for message info
 * @msg: received message header
1251
 * @tsk: TIPC port associated with message
1252
 *
P
Per Liden 已提交
1253
 * Note: Ancillary data is not captured if not requested by receiver.
1254
 *
P
Per Liden 已提交
1255 1256
 * Returns 0 if successful, otherwise errno
 */
1257 1258
static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
				 struct tipc_sock *tsk)
P
Per Liden 已提交
1259 1260 1261 1262
{
	u32 anc_data[3];
	u32 err;
	u32 dest_type;
1263
	int has_name;
P
Per Liden 已提交
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273
	int res;

	if (likely(m->msg_controllen == 0))
		return 0;

	/* Optionally capture errored message object(s) */
	err = msg ? msg_errcode(msg) : 0;
	if (unlikely(err)) {
		anc_data[0] = err;
		anc_data[1] = msg_data_sz(msg);
1274 1275
		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
		if (res)
P
Per Liden 已提交
1276
			return res;
1277 1278 1279 1280 1281 1282
		if (anc_data[1]) {
			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
				       msg_data(msg));
			if (res)
				return res;
		}
P
Per Liden 已提交
1283 1284 1285 1286 1287 1288
	}

	/* Optionally capture message destination object */
	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
	switch (dest_type) {
	case TIPC_NAMED_MSG:
1289
		has_name = 1;
P
Per Liden 已提交
1290 1291 1292 1293 1294
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_namelower(msg);
		break;
	case TIPC_MCAST_MSG:
1295
		has_name = 1;
P
Per Liden 已提交
1296 1297 1298 1299 1300
		anc_data[0] = msg_nametype(msg);
		anc_data[1] = msg_namelower(msg);
		anc_data[2] = msg_nameupper(msg);
		break;
	case TIPC_CONN_MSG:
1301 1302 1303 1304
		has_name = (tsk->conn_type != 0);
		anc_data[0] = tsk->conn_type;
		anc_data[1] = tsk->conn_instance;
		anc_data[2] = tsk->conn_instance;
P
Per Liden 已提交
1305 1306
		break;
	default:
1307
		has_name = 0;
P
Per Liden 已提交
1308
	}
1309 1310 1311 1312 1313
	if (has_name) {
		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
		if (res)
			return res;
	}
P
Per Liden 已提交
1314 1315 1316 1317

	return 0;
}

1318
static void tipc_sk_send_ack(struct tipc_sock *tsk)
1319
{
1320 1321
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
1322
	struct sk_buff *skb = NULL;
1323
	struct tipc_msg *msg;
1324 1325
	u32 peer_port = tsk_peer_port(tsk);
	u32 dnode = tsk_peer_node(tsk);
1326

1327
	if (!tipc_sk_connected(sk))
1328
		return;
1329 1330 1331
	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
			      dnode, tsk_own_node(tsk), peer_port,
			      tsk->portid, TIPC_OK);
1332
	if (!skb)
1333
		return;
1334
	msg = buf_msg(skb);
1335 1336 1337 1338 1339 1340 1341 1342
	msg_set_conn_ack(msg, tsk->rcv_unacked);
	tsk->rcv_unacked = 0;

	/* Adjust to and advertize the correct window limit */
	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
		msg_set_adv_win(msg, tsk->rcv_win);
	}
1343
	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1344 1345
}

1346
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
Y
Ying Xue 已提交
1347 1348 1349
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
1350
	long timeo = *timeop;
1351 1352 1353 1354
	int err = sock_error(sk);

	if (err)
		return err;
Y
Ying Xue 已提交
1355 1356 1357

	for (;;) {
		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1358
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1359
			if (sk->sk_shutdown & RCV_SHUTDOWN) {
Y
Ying Xue 已提交
1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372
				err = -ENOTCONN;
				break;
			}
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
1373 1374 1375
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
1376 1377 1378 1379

		err = sock_error(sk);
		if (err)
			break;
Y
Ying Xue 已提交
1380 1381
	}
	finish_wait(sk_sleep(sk), &wait);
1382
	*timeop = timeo;
Y
Ying Xue 已提交
1383 1384 1385
	return err;
}

1386
/**
1387
 * tipc_recvmsg - receive packet-oriented message
P
Per Liden 已提交
1388
 * @m: descriptor for message info
1389
 * @buflen: length of user buffer area
P
Per Liden 已提交
1390
 * @flags: receive flags
1391
 *
P
Per Liden 已提交
1392 1393 1394 1395 1396
 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 * If the complete message doesn't fit in user area, truncate it.
 *
 * Returns size of returned message data, errno otherwise
 */
1397 1398
static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
			size_t buflen,	int flags)
P
Per Liden 已提交
1399
{
1400
	struct sock *sk = sock->sk;
1401
	struct tipc_sock *tsk = tipc_sk(sk);
1402 1403 1404 1405 1406
	struct sk_buff *skb;
	struct tipc_msg *hdr;
	bool connected = !tipc_sk_type_connectionless(sk);
	int rc, err, hlen, dlen, copy;
	long timeout;
P
Per Liden 已提交
1407

1408
	/* Catch invalid receive requests */
1409
	if (unlikely(!buflen))
P
Per Liden 已提交
1410 1411
		return -EINVAL;

1412
	lock_sock(sk);
1413 1414
	if (unlikely(connected && sk->sk_state == TIPC_OPEN)) {
		rc = -ENOTCONN;
P
Per Liden 已提交
1415 1416
		goto exit;
	}
1417
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1418

1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430
	do {
		/* Look at first msg in receive queue; wait if necessary */
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			goto exit;
		skb = skb_peek(&sk->sk_receive_queue);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
		if (likely(dlen || err))
			break;
1431
		tsk_advance_rx_queue(sk);
1432
	} while (1);
P
Per Liden 已提交
1433

1434 1435 1436 1437
	/* Collect msg meta data, including error code and rejected data */
	set_orig_addr(m, hdr);
	rc = tipc_sk_anc_data_recv(m, hdr, tsk);
	if (unlikely(rc))
P
Per Liden 已提交
1438 1439
		goto exit;

1440 1441 1442 1443
	/* Capture data if non-error msg, otherwise just set return value */
	if (likely(!err)) {
		copy = min_t(int, dlen, buflen);
		if (unlikely(copy != dlen))
P
Per Liden 已提交
1444
			m->msg_flags |= MSG_TRUNC;
1445
		rc = skb_copy_datagram_msg(skb, hlen, m, copy);
P
Per Liden 已提交
1446
	} else {
1447 1448 1449 1450
		copy = 0;
		rc = 0;
		if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
			rc = -ECONNRESET;
P
Per Liden 已提交
1451
	}
1452 1453
	if (unlikely(rc))
		goto exit;
P
Per Liden 已提交
1454

1455
	/* Caption of data or error code/rejected data was successful */
1456 1457 1458 1459
	if (unlikely(flags & MSG_PEEK))
		goto exit;

	tsk_advance_rx_queue(sk);
1460 1461 1462 1463 1464 1465 1466
	if (likely(!connected))
		goto exit;

	/* Send connection flow control ack when applicable */
	tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
	if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
		tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1467
exit:
1468
	release_sock(sk);
1469
	return rc ? rc : copy;
P
Per Liden 已提交
1470 1471
}

1472
/**
1473
 * tipc_recvstream - receive stream-oriented data
P
Per Liden 已提交
1474
 * @m: descriptor for message info
1475
 * @buflen: total size of user buffer area
P
Per Liden 已提交
1476
 * @flags: receive flags
1477 1478
 *
 * Used for SOCK_STREAM messages only.  If not enough data is available
P
Per Liden 已提交
1479 1480 1481 1482
 * will optionally wait for more; never truncates data.
 *
 * Returns size of returned message data, errno otherwise
 */
1483 1484
static int tipc_recvstream(struct socket *sock, struct msghdr *m,
			   size_t buflen, int flags)
P
Per Liden 已提交
1485
{
1486
	struct sock *sk = sock->sk;
1487
	struct tipc_sock *tsk = tipc_sk(sk);
1488 1489 1490 1491 1492 1493 1494
	struct sk_buff *skb;
	struct tipc_msg *hdr;
	struct tipc_skb_cb *skb_cb;
	bool peek = flags & MSG_PEEK;
	int offset, required, copy, copied = 0;
	int hlen, dlen, err, rc;
	long timeout;
P
Per Liden 已提交
1495

1496
	/* Catch invalid receive attempts */
1497
	if (unlikely(!buflen))
P
Per Liden 已提交
1498 1499
		return -EINVAL;

1500
	lock_sock(sk);
P
Per Liden 已提交
1501

1502
	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1503
		rc = -ENOTCONN;
Y
Ying Xue 已提交
1504
		goto exit;
P
Per Liden 已提交
1505
	}
1506 1507
	required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen);
	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
P
Per Liden 已提交
1508

1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519
	do {
		/* Look at first msg in receive queue; wait if necessary */
		rc = tipc_wait_for_rcvmsg(sock, &timeout);
		if (unlikely(rc))
			break;
		skb = skb_peek(&sk->sk_receive_queue);
		skb_cb = TIPC_SKB_CB(skb);
		hdr = buf_msg(skb);
		dlen = msg_data_sz(hdr);
		hlen = msg_hdr_sz(hdr);
		err = msg_errcode(hdr);
1520

1521 1522 1523 1524 1525
		/* Discard any empty non-errored (SYN-) message */
		if (unlikely(!dlen && !err)) {
			tsk_advance_rx_queue(sk);
			continue;
		}
1526

1527 1528 1529 1530 1531 1532 1533
		/* Collect msg meta data, incl. error code and rejected data */
		if (!copied) {
			set_orig_addr(m, hdr);
			rc = tipc_sk_anc_data_recv(m, hdr, tsk);
			if (rc)
				break;
		}
P
Per Liden 已提交
1534

1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554
		/* Copy data if msg ok, otherwise return error/partial data */
		if (likely(!err)) {
			offset = skb_cb->bytes_read;
			copy = min_t(int, dlen - offset, buflen - copied);
			rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
			if (unlikely(rc))
				break;
			copied += copy;
			offset += copy;
			if (unlikely(offset < dlen)) {
				if (!peek)
					skb_cb->bytes_read = offset;
				break;
			}
		} else {
			rc = 0;
			if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control)
				rc = -ECONNRESET;
			if (copied || rc)
				break;
P
Per Liden 已提交
1555 1556
		}

1557 1558
		if (unlikely(peek))
			break;
P
Per Liden 已提交
1559

1560
		tsk_advance_rx_queue(sk);
1561

1562 1563 1564 1565
		/* Send connection flow control advertisement when applicable */
		tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
		if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
			tipc_sk_send_ack(tsk);
P
Per Liden 已提交
1566

1567 1568 1569
		/* Exit if all requested data or FIN/error received */
		if (copied == buflen || err)
			break;
P
Per Liden 已提交
1570

1571
	} while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required);
P
Per Liden 已提交
1572
exit:
1573
	release_sock(sk);
1574
	return copied ? copied : rc;
P
Per Liden 已提交
1575 1576
}

1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
/**
 * tipc_write_space - wake up thread if port congestion is released
 * @sk: socket
 */
static void tipc_write_space(struct sock *sk)
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1587
	if (skwq_has_sleeper(wq))
1588 1589 1590 1591 1592 1593 1594 1595 1596 1597
		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
						POLLWRNORM | POLLWRBAND);
	rcu_read_unlock();
}

/**
 * tipc_data_ready - wake up threads to indicate messages have been received
 * @sk: socket
 * @len: the length of messages
 */
1598
static void tipc_data_ready(struct sock *sk)
1599 1600 1601 1602 1603
{
	struct socket_wq *wq;

	rcu_read_lock();
	wq = rcu_dereference(sk->sk_wq);
H
Herbert Xu 已提交
1604
	if (skwq_has_sleeper(wq))
1605 1606 1607 1608 1609
		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
						POLLRDNORM | POLLRDBAND);
	rcu_read_unlock();
}

1610 1611 1612 1613 1614
static void tipc_sock_destruct(struct sock *sk)
{
	__skb_queue_purge(&sk->sk_receive_queue);
}

J
Jon Maloy 已提交
1615 1616 1617 1618 1619 1620 1621
static void tipc_sk_proto_rcv(struct sock *sk,
			      struct sk_buff_head *inputq,
			      struct sk_buff_head *xmitq)
{
	struct sk_buff *skb = __skb_dequeue(inputq);
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1622
	struct tipc_group *grp = tsk->group;
J
Jon Maloy 已提交
1623 1624 1625 1626 1627 1628

	switch (msg_user(hdr)) {
	case CONN_MANAGER:
		tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
		return;
	case SOCK_WAKEUP:
J
Jon Maloy 已提交
1629
		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
J
Jon Maloy 已提交
1630 1631 1632
		tsk->cong_link_cnt--;
		sk->sk_write_space(sk);
		break;
J
Jon Maloy 已提交
1633 1634 1635
	case GROUP_PROTOCOL:
		tipc_group_proto_rcv(grp, hdr, xmitq);
		break;
J
Jon Maloy 已提交
1636
	case TOP_SRV:
J
Jon Maloy 已提交
1637 1638
		tipc_group_member_evt(tsk->group, skb, xmitq);
		skb = NULL;
J
Jon Maloy 已提交
1639 1640 1641 1642 1643 1644 1645 1646
		break;
	default:
		break;
	}

	kfree_skb(skb);
}

1647
/**
J
Jon Maloy 已提交
1648
 * tipc_filter_connect - Handle incoming message for a connection-based socket
1649
 * @tsk: TIPC socket
1650
 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1651
 *
1652
 * Returns true if everything ok, false otherwise
1653
 */
J
Jon Maloy 已提交
1654
static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1655
{
1656
	struct sock *sk = &tsk->sk;
1657
	struct net *net = sock_net(sk);
1658
	struct tipc_msg *hdr = buf_msg(skb);
1659 1660
	u32 pport = msg_origport(hdr);
	u32 pnode = msg_orignode(hdr);
1661

1662 1663
	if (unlikely(msg_mcast(hdr)))
		return false;
1664

1665 1666
	switch (sk->sk_state) {
	case TIPC_CONNECTING:
1667
		/* Accept only ACK or NACK message */
1668 1669 1670 1671 1672 1673 1674 1675 1676 1677
		if (unlikely(!msg_connected(hdr))) {
			if (pport != tsk_peer_port(tsk) ||
			    pnode != tsk_peer_node(tsk))
				return false;

			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			sk->sk_err = ECONNREFUSED;
			sk->sk_state_change(sk);
			return true;
		}
1678

1679
		if (unlikely(msg_errcode(hdr))) {
1680
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1681
			sk->sk_err = ECONNREFUSED;
1682
			sk->sk_state_change(sk);
1683
			return true;
1684 1685
		}

1686
		if (unlikely(!msg_isdata(hdr))) {
1687
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1688
			sk->sk_err = EINVAL;
1689
			sk->sk_state_change(sk);
1690
			return true;
1691 1692
		}

1693 1694
		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
		msg_set_importance(&tsk->phdr, msg_importance(hdr));
1695

1696 1697 1698 1699 1700
		/* If 'ACK+' message, add to socket receive queue */
		if (msg_data_sz(hdr))
			return true;

		/* If empty 'ACK-' message, wake up sleeping connect() */
1701
		sk->sk_data_ready(sk);
1702 1703 1704 1705 1706

		/* 'ACK-' message is neither accepted nor rejected: */
		msg_set_dest_droppable(hdr, 1);
		return false;

1707
	case TIPC_OPEN:
1708
	case TIPC_DISCONNECTING:
1709 1710
		break;
	case TIPC_LISTEN:
1711
		/* Accept only SYN message */
1712 1713
		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
			return true;
1714
		break;
1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
	case TIPC_ESTABLISHED:
		/* Accept only connection-based messages sent by peer */
		if (unlikely(!tsk_peer_msg(tsk, hdr)))
			return false;

		if (unlikely(msg_errcode(hdr))) {
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
			/* Let timer expire on it's own */
			tipc_node_remove_conn(net, tsk_peer_node(tsk),
					      tsk->portid);
			sk->sk_state_change(sk);
		}
		return true;
1728
	default:
1729
		pr_err("Unknown sk_state %u\n", sk->sk_state);
1730
	}
1731

1732
	return false;
1733 1734
}

1735 1736 1737
/**
 * rcvbuf_limit - get proper overload limit of socket receive queue
 * @sk: socket
1738
 * @skb: message
1739
 *
1740 1741
 * For connection oriented messages, irrespective of importance,
 * default queue limit is 2 MB.
1742
 *
1743 1744
 * For connectionless messages, queue limits are based on message
 * importance as follows:
1745
 *
1746 1747 1748 1749
 * TIPC_LOW_IMPORTANCE       (2 MB)
 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
 * TIPC_HIGH_IMPORTANCE      (8 MB)
 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
1750 1751 1752
 *
 * Returns overload limit according to corresponding message importance
 */
1753
static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1754
{
1755 1756 1757 1758 1759
	struct tipc_sock *tsk = tipc_sk(sk);
	struct tipc_msg *hdr = buf_msg(skb);

	if (unlikely(!msg_connected(hdr)))
		return sk->sk_rcvbuf << msg_importance(hdr);
1760

1761 1762
	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
		return sk->sk_rcvbuf;
1763

1764
	return FLOWCTL_MSG_LIM;
1765 1766
}

1767
/**
J
Jon Maloy 已提交
1768
 * tipc_sk_filter_rcv - validate incoming message
1769
 * @sk: socket
1770
 * @skb: pointer to message.
1771
 *
1772 1773 1774
 * Enqueues message on receive queue if acceptable; optionally handles
 * disconnect indication for a connected socket.
 *
1775
 * Called with socket lock already taken
1776
 *
P
Per Liden 已提交
1777
 */
J
Jon Maloy 已提交
1778 1779
static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
			       struct sk_buff_head *xmitq)
P
Per Liden 已提交
1780
{
J
Jon Maloy 已提交
1781
	bool sk_conn = !tipc_sk_type_connectionless(sk);
1782
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
1783
	struct tipc_group *grp = tsk->group;
1784
	struct tipc_msg *hdr = buf_msg(skb);
J
Jon Maloy 已提交
1785 1786 1787
	struct net *net = sock_net(sk);
	struct sk_buff_head inputq;
	int limit, err = TIPC_OK;
1788

J
Jon Maloy 已提交
1789 1790 1791
	TIPC_SKB_CB(skb)->bytes_read = 0;
	__skb_queue_head_init(&inputq);
	__skb_queue_tail(&inputq, skb);
1792

J
Jon Maloy 已提交
1793 1794
	if (unlikely(!msg_isdata(hdr)))
		tipc_sk_proto_rcv(sk, &inputq, xmitq);
J
Jon Maloy 已提交
1795
	else if (unlikely(msg_type(hdr) > TIPC_GRP_BCAST_MSG))
J
Jon Maloy 已提交
1796
		return kfree_skb(skb);
1797

J
Jon Maloy 已提交
1798 1799 1800
	if (unlikely(grp))
		tipc_group_filter_msg(grp, &inputq, xmitq);

J
Jon Maloy 已提交
1801 1802 1803 1804 1805
	/* Validate and add to receive buffer if there is space */
	while ((skb = __skb_dequeue(&inputq))) {
		hdr = buf_msg(skb);
		limit = rcvbuf_limit(sk, skb);
		if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
J
Jon Maloy 已提交
1806 1807
		    (!sk_conn && msg_connected(hdr)) ||
		    (!grp && msg_in_group(hdr)))
1808
			err = TIPC_ERR_NO_PORT;
J
Jon Maloy 已提交
1809 1810
		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
			err = TIPC_ERR_OVERLOAD;
P
Per Liden 已提交
1811

J
Jon Maloy 已提交
1812 1813 1814 1815 1816 1817 1818 1819
		if (unlikely(err)) {
			tipc_skb_reject(net, err, skb, xmitq);
			err = TIPC_OK;
			continue;
		}
		__skb_queue_tail(&sk->sk_receive_queue, skb);
		skb_set_owner_r(skb, sk);
		sk->sk_data_ready(sk);
1820
	}
1821
}
P
Per Liden 已提交
1822

1823
/**
J
Jon Maloy 已提交
1824
 * tipc_sk_backlog_rcv - handle incoming message from backlog queue
1825
 * @sk: socket
1826
 * @skb: message
1827
 *
1828
 * Caller must hold socket lock
1829
 */
J
Jon Maloy 已提交
1830
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1831
{
J
Jon Maloy 已提交
1832
	unsigned int before = sk_rmem_alloc_get(sk);
J
Jon Paul Maloy 已提交
1833
	struct sk_buff_head xmitq;
J
Jon Maloy 已提交
1834
	unsigned int added;
1835

J
Jon Paul Maloy 已提交
1836 1837
	__skb_queue_head_init(&xmitq);

J
Jon Maloy 已提交
1838 1839 1840
	tipc_sk_filter_rcv(sk, skb, &xmitq);
	added = sk_rmem_alloc_get(sk) - before;
	atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
J
Jon Paul Maloy 已提交
1841

J
Jon Maloy 已提交
1842
	/* Send pending response/rejected messages, if any */
1843
	tipc_node_distr_xmit(sock_net(sk), &xmitq);
1844 1845 1846
	return 0;
}

1847
/**
1848 1849 1850 1851 1852
 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
 *                   inputq and try adding them to socket or backlog queue
 * @inputq: list of incoming buffers with potentially different destinations
 * @sk: socket where the buffers should be enqueued
 * @dport: port number for the socket
1853 1854 1855
 *
 * Caller must hold socket lock
 */
1856
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
J
Jon Paul Maloy 已提交
1857
			    u32 dport, struct sk_buff_head *xmitq)
1858
{
J
Jon Paul Maloy 已提交
1859 1860
	unsigned long time_limit = jiffies + 2;
	struct sk_buff *skb;
1861 1862
	unsigned int lim;
	atomic_t *dcnt;
J
Jon Paul Maloy 已提交
1863
	u32 onode;
1864 1865

	while (skb_queue_len(inputq)) {
1866
		if (unlikely(time_after_eq(jiffies, time_limit)))
1867 1868
			return;

1869 1870
		skb = tipc_skb_dequeue(inputq, dport);
		if (unlikely(!skb))
1871 1872 1873
			return;

		/* Add message directly to receive queue if possible */
1874
		if (!sock_owned_by_user(sk)) {
J
Jon Maloy 已提交
1875
			tipc_sk_filter_rcv(sk, skb, xmitq);
1876
			continue;
1877
		}
1878 1879

		/* Try backlog, compensating for double-counted bytes */
1880
		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
1881
		if (!sk->sk_backlog.len)
1882 1883 1884 1885
			atomic_set(dcnt, 0);
		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
		if (likely(!sk_add_backlog(sk, skb, lim)))
			continue;
1886 1887

		/* Overload => reject message back to sender */
J
Jon Paul Maloy 已提交
1888 1889 1890
		onode = tipc_own_addr(sock_net(sk));
		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
			__skb_queue_tail(xmitq, skb);
1891
		break;
1892
	}
1893 1894
}

1895
/**
1896 1897 1898 1899
 * tipc_sk_rcv - handle a chain of incoming buffers
 * @inputq: buffer list containing the buffers
 * Consumes all buffers in list until inputq is empty
 * Note: may be called in multiple threads referring to the same queue
1900
 */
1901
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1902
{
J
Jon Paul Maloy 已提交
1903
	struct sk_buff_head xmitq;
1904
	u32 dnode, dport = 0;
E
Erik Hugne 已提交
1905
	int err;
1906 1907
	struct tipc_sock *tsk;
	struct sock *sk;
1908
	struct sk_buff *skb;
1909

J
Jon Paul Maloy 已提交
1910
	__skb_queue_head_init(&xmitq);
1911 1912 1913
	while (skb_queue_len(inputq)) {
		dport = tipc_skb_peek_port(inputq, dport);
		tsk = tipc_sk_lookup(net, dport);
1914

1915 1916 1917
		if (likely(tsk)) {
			sk = &tsk->sk;
			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
J
Jon Paul Maloy 已提交
1918
				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
1919 1920
				spin_unlock_bh(&sk->sk_lock.slock);
			}
J
Jon Paul Maloy 已提交
1921
			/* Send pending response/rejected messages, if any */
1922
			tipc_node_distr_xmit(sock_net(sk), &xmitq);
1923 1924 1925
			sock_put(sk);
			continue;
		}
1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937
		/* No destination socket => dequeue skb if still there */
		skb = tipc_skb_dequeue(inputq, dport);
		if (!skb)
			return;

		/* Try secondary lookup if unresolved named message */
		err = TIPC_ERR_NO_PORT;
		if (tipc_msg_lookup_dest(net, skb, &err))
			goto xmit;

		/* Prepare for message rejection */
		if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
1938
			continue;
1939
xmit:
1940
		dnode = msg_destnode(buf_msg(skb));
1941
		tipc_node_xmit_skb(net, skb, dnode, dport);
1942
	}
P
Per Liden 已提交
1943 1944
}

Y
Ying Xue 已提交
1945 1946
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
{
W
WANG Cong 已提交
1947
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Y
Ying Xue 已提交
1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959
	struct sock *sk = sock->sk;
	int done;

	do {
		int err = sock_error(sk);
		if (err)
			return err;
		if (!*timeo_p)
			return -ETIMEDOUT;
		if (signal_pending(current))
			return sock_intr_errno(*timeo_p);

W
WANG Cong 已提交
1960
		add_wait_queue(sk_sleep(sk), &wait);
1961
		done = sk_wait_event(sk, timeo_p,
W
WANG Cong 已提交
1962 1963
				     sk->sk_state != TIPC_CONNECTING, &wait);
		remove_wait_queue(sk_sleep(sk), &wait);
Y
Ying Xue 已提交
1964 1965 1966 1967
	} while (!done);
	return 0;
}

P
Per Liden 已提交
1968
/**
1969
 * tipc_connect - establish a connection to another TIPC port
P
Per Liden 已提交
1970 1971 1972
 * @sock: socket structure
 * @dest: socket address for destination port
 * @destlen: size of socket address data structure
1973
 * @flags: file-related flags associated with socket
P
Per Liden 已提交
1974 1975 1976
 *
 * Returns 0 on success, errno otherwise
 */
1977 1978
static int tipc_connect(struct socket *sock, struct sockaddr *dest,
			int destlen, int flags)
P
Per Liden 已提交
1979
{
1980
	struct sock *sk = sock->sk;
1981
	struct tipc_sock *tsk = tipc_sk(sk);
1982 1983
	struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
	struct msghdr m = {NULL,};
1984
	long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
1985
	int previous;
1986
	int res = 0;
1987

1988 1989 1990
	if (destlen != sizeof(struct sockaddr_tipc))
		return -EINVAL;

1991 1992
	lock_sock(sk);

J
Jon Maloy 已提交
1993 1994 1995 1996 1997
	if (tsk->group) {
		res = -EINVAL;
		goto exit;
	}

1998 1999 2000
	if (dst->family == AF_UNSPEC) {
		memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
		if (!tipc_sk_type_connectionless(sk))
2001
			res = -EINVAL;
2002
		goto exit;
2003 2004
	} else if (dst->family != AF_TIPC) {
		res = -EINVAL;
2005
	}
2006
	if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
2007
		res = -EINVAL;
2008 2009 2010 2011 2012 2013
	if (res)
		goto exit;

	/* DGRAM/RDM connect(), just save the destaddr */
	if (tipc_sk_type_connectionless(sk)) {
		memcpy(&tsk->peer, dest, destlen);
2014 2015 2016
		goto exit;
	}

2017
	previous = sk->sk_state;
2018 2019 2020

	switch (sk->sk_state) {
	case TIPC_OPEN:
2021 2022 2023 2024 2025 2026 2027 2028 2029 2030
		/* Send a 'SYN-' to destination */
		m.msg_name = dest;
		m.msg_namelen = destlen;

		/* If connect is in non-blocking case, set MSG_DONTWAIT to
		 * indicate send_msg() is never blocked.
		 */
		if (!timeout)
			m.msg_flags = MSG_DONTWAIT;

2031
		res = __tipc_sendmsg(sock, &m, 0);
2032 2033 2034
		if ((res < 0) && (res != -EWOULDBLOCK))
			goto exit;

2035
		/* Just entered TIPC_CONNECTING state; the only
2036 2037 2038 2039
		 * difference is that return value in non-blocking
		 * case is EINPROGRESS, rather than EALREADY.
		 */
		res = -EINPROGRESS;
2040 2041 2042 2043 2044
		/* fall thru' */
	case TIPC_CONNECTING:
		if (!timeout) {
			if (previous == TIPC_CONNECTING)
				res = -EALREADY;
Y
Ying Xue 已提交
2045
			goto exit;
2046
		}
Y
Ying Xue 已提交
2047 2048 2049
		timeout = msecs_to_jiffies(timeout);
		/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
		res = tipc_wait_for_connect(sock, &timeout);
2050 2051
		break;
	case TIPC_ESTABLISHED:
2052
		res = -EISCONN;
2053 2054
		break;
	default:
2055
		res = -EINVAL;
2056
	}
2057

2058 2059
exit:
	release_sock(sk);
2060
	return res;
P
Per Liden 已提交
2061 2062
}

2063
/**
2064
 * tipc_listen - allow socket to listen for incoming connections
P
Per Liden 已提交
2065 2066
 * @sock: socket structure
 * @len: (unused)
2067
 *
P
Per Liden 已提交
2068 2069
 * Returns 0 on success, errno otherwise
 */
2070
static int tipc_listen(struct socket *sock, int len)
P
Per Liden 已提交
2071
{
2072 2073 2074 2075
	struct sock *sk = sock->sk;
	int res;

	lock_sock(sk);
2076
	res = tipc_set_sk_state(sk, TIPC_LISTEN);
2077
	release_sock(sk);
2078

2079
	return res;
P
Per Liden 已提交
2080 2081
}

Y
Ying Xue 已提交
2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
	struct sock *sk = sock->sk;
	DEFINE_WAIT(wait);
	int err;

	/* True wake-one mechanism for incoming connections: only
	 * one process gets woken up, not the 'whole herd'.
	 * Since we do not 'race & poll' for established sockets
	 * anymore, the common case will execute the loop only once.
	*/
	for (;;) {
		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
					  TASK_INTERRUPTIBLE);
2096
		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
Y
Ying Xue 已提交
2097 2098 2099 2100 2101 2102 2103 2104 2105 2106
			release_sock(sk);
			timeo = schedule_timeout(timeo);
			lock_sock(sk);
		}
		err = 0;
		if (!skb_queue_empty(&sk->sk_receive_queue))
			break;
		err = -EAGAIN;
		if (!timeo)
			break;
2107 2108 2109
		err = sock_intr_errno(timeo);
		if (signal_pending(current))
			break;
Y
Ying Xue 已提交
2110 2111 2112 2113 2114
	}
	finish_wait(sk_sleep(sk), &wait);
	return err;
}

2115
/**
2116
 * tipc_accept - wait for connection request
P
Per Liden 已提交
2117 2118 2119
 * @sock: listening socket
 * @newsock: new socket that is to be connected
 * @flags: file-related flags associated with socket
2120
 *
P
Per Liden 已提交
2121 2122
 * Returns 0 on success, errno otherwise
 */
2123 2124
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
		       bool kern)
P
Per Liden 已提交
2125
{
2126
	struct sock *new_sk, *sk = sock->sk;
P
Per Liden 已提交
2127
	struct sk_buff *buf;
2128
	struct tipc_sock *new_tsock;
2129
	struct tipc_msg *msg;
Y
Ying Xue 已提交
2130
	long timeo;
2131
	int res;
P
Per Liden 已提交
2132

2133
	lock_sock(sk);
P
Per Liden 已提交
2134

2135
	if (sk->sk_state != TIPC_LISTEN) {
2136
		res = -EINVAL;
P
Per Liden 已提交
2137 2138
		goto exit;
	}
Y
Ying Xue 已提交
2139 2140 2141 2142
	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
	res = tipc_wait_for_accept(sock, timeo);
	if (res)
		goto exit;
2143 2144 2145

	buf = skb_peek(&sk->sk_receive_queue);

2146
	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
2147 2148
	if (res)
		goto exit;
2149
	security_sk_clone(sock->sk, new_sock->sk);
P
Per Liden 已提交
2150

2151
	new_sk = new_sock->sk;
2152
	new_tsock = tipc_sk(new_sk);
2153
	msg = buf_msg(buf);
P
Per Liden 已提交
2154

2155 2156 2157 2158 2159 2160 2161
	/* we lock on new_sk; but lockdep sees the lock on sk */
	lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING);

	/*
	 * Reject any stray messages received by new socket
	 * before the socket lock was taken (very, very unlikely)
	 */
2162
	tsk_rej_rx_queue(new_sk);
2163 2164

	/* Connect new socket to it's peer */
2165
	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
2166

2167
	tsk_set_importance(new_tsock, msg_importance(msg));
2168
	if (msg_named(msg)) {
2169 2170
		new_tsock->conn_type = msg_nametype(msg);
		new_tsock->conn_instance = msg_nameinst(msg);
P
Per Liden 已提交
2171
	}
2172 2173 2174 2175 2176 2177 2178 2179

	/*
	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
	 * Respond to 'SYN+' by queuing it on new socket.
	 */
	if (!msg_data_sz(msg)) {
		struct msghdr m = {NULL,};

2180
		tsk_advance_rx_queue(sk);
2181
		__tipc_sendstream(new_sock, &m, 0);
2182 2183 2184
	} else {
		__skb_dequeue(&sk->sk_receive_queue);
		__skb_queue_head(&new_sk->sk_receive_queue, buf);
2185
		skb_set_owner_r(buf, new_sk);
2186 2187
	}
	release_sock(new_sk);
P
Per Liden 已提交
2188
exit:
2189
	release_sock(sk);
P
Per Liden 已提交
2190 2191 2192 2193
	return res;
}

/**
2194
 * tipc_shutdown - shutdown socket connection
P
Per Liden 已提交
2195
 * @sock: socket structure
2196
 * @how: direction to close (must be SHUT_RDWR)
P
Per Liden 已提交
2197 2198
 *
 * Terminates connection (if necessary), then purges socket's receive queue.
2199
 *
P
Per Liden 已提交
2200 2201
 * Returns 0 on success, errno otherwise
 */
2202
static int tipc_shutdown(struct socket *sock, int how)
P
Per Liden 已提交
2203
{
2204
	struct sock *sk = sock->sk;
P
Per Liden 已提交
2205 2206
	int res;

2207 2208
	if (how != SHUT_RDWR)
		return -EINVAL;
P
Per Liden 已提交
2209

2210
	lock_sock(sk);
P
Per Liden 已提交
2211

2212 2213
	__tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
	sk->sk_shutdown = SEND_SHUTDOWN;
P
Per Liden 已提交
2214

2215
	if (sk->sk_state == TIPC_DISCONNECTING) {
2216
		/* Discard any unreceived messages */
2217
		__skb_queue_purge(&sk->sk_receive_queue);
2218 2219 2220

		/* Wake up anyone sleeping in poll */
		sk->sk_state_change(sk);
P
Per Liden 已提交
2221
		res = 0;
2222
	} else {
P
Per Liden 已提交
2223 2224 2225
		res = -ENOTCONN;
	}

2226
	release_sock(sk);
P
Per Liden 已提交
2227 2228 2229
	return res;
}

2230
static void tipc_sk_timeout(unsigned long data)
2231
{
2232 2233
	struct tipc_sock *tsk = (struct tipc_sock *)data;
	struct sock *sk = &tsk->sk;
2234
	struct sk_buff *skb = NULL;
2235
	u32 peer_port, peer_node;
2236
	u32 own_node = tsk_own_node(tsk);
2237

J
Jon Paul Maloy 已提交
2238
	bh_lock_sock(sk);
2239
	if (!tipc_sk_connected(sk)) {
J
Jon Paul Maloy 已提交
2240 2241
		bh_unlock_sock(sk);
		goto exit;
2242
	}
2243 2244
	peer_port = tsk_peer_port(tsk);
	peer_node = tsk_peer_node(tsk);
2245

2246
	if (tsk->probe_unacked) {
2247
		if (!sock_owned_by_user(sk)) {
2248
			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
2249 2250 2251 2252 2253 2254 2255 2256
			tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
					      tsk_peer_port(tsk));
			sk->sk_state_change(sk);
		} else {
			/* Try again later */
			sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
		}

2257 2258
		bh_unlock_sock(sk);
		goto exit;
2259
	}
2260 2261 2262 2263

	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
			      INT_H_SIZE, 0, peer_node, own_node,
			      peer_port, tsk->portid, TIPC_OK);
2264
	tsk->probe_unacked = true;
2265
	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
2266
	bh_unlock_sock(sk);
2267
	if (skb)
2268
		tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
J
Jon Paul Maloy 已提交
2269
exit:
2270
	sock_put(sk);
2271 2272
}

2273
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2274 2275
			   struct tipc_name_seq const *seq)
{
2276 2277
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
J
Jon Paul Maloy 已提交
2278 2279 2280
	struct publication *publ;
	u32 key;

2281
	if (tipc_sk_connected(sk))
J
Jon Paul Maloy 已提交
2282
		return -EINVAL;
2283 2284
	key = tsk->portid + tsk->pub_count + 1;
	if (key == tsk->portid)
J
Jon Paul Maloy 已提交
2285 2286
		return -EADDRINUSE;

2287
	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
2288
				    scope, tsk->portid, key);
J
Jon Paul Maloy 已提交
2289 2290 2291
	if (unlikely(!publ))
		return -EINVAL;

2292 2293 2294
	list_add(&publ->pport_list, &tsk->publications);
	tsk->pub_count++;
	tsk->published = 1;
J
Jon Paul Maloy 已提交
2295 2296 2297
	return 0;
}

2298
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
J
Jon Paul Maloy 已提交
2299 2300
			    struct tipc_name_seq const *seq)
{
2301
	struct net *net = sock_net(&tsk->sk);
J
Jon Paul Maloy 已提交
2302 2303 2304 2305
	struct publication *publ;
	struct publication *safe;
	int rc = -EINVAL;

2306
	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
J
Jon Paul Maloy 已提交
2307 2308 2309 2310 2311 2312 2313 2314 2315
		if (seq) {
			if (publ->scope != scope)
				continue;
			if (publ->type != seq->type)
				continue;
			if (publ->lower != seq->lower)
				continue;
			if (publ->upper != seq->upper)
				break;
2316
			tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2317 2318 2319 2320
					      publ->ref, publ->key);
			rc = 0;
			break;
		}
2321
		tipc_nametbl_withdraw(net, publ->type, publ->lower,
J
Jon Paul Maloy 已提交
2322 2323 2324
				      publ->ref, publ->key);
		rc = 0;
	}
2325 2326
	if (list_empty(&tsk->publications))
		tsk->published = 0;
J
Jon Paul Maloy 已提交
2327 2328 2329
	return rc;
}

2330 2331 2332
/* tipc_sk_reinit: set non-zero address in all existing sockets
 *                 when we go from standalone to network mode.
 */
2333
void tipc_sk_reinit(struct net *net)
2334
{
2335
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2336
	struct rhashtable_iter iter;
2337
	struct tipc_sock *tsk;
2338 2339
	struct tipc_msg *msg;

2340 2341 2342 2343
	rhashtable_walk_enter(&tn->sk_rht, &iter);

	do {
		tsk = ERR_PTR(rhashtable_walk_start(&iter));
2344 2345
		if (IS_ERR(tsk))
			goto walk_stop;
2346 2347

		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
2348 2349
			spin_lock_bh(&tsk->sk.sk_lock.slock);
			msg = &tsk->phdr;
2350 2351
			msg_set_prevnode(msg, tn->own_addr);
			msg_set_orignode(msg, tn->own_addr);
2352 2353
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2354
walk_stop:
2355 2356
		rhashtable_walk_stop(&iter);
	} while (tsk == ERR_PTR(-EAGAIN));
2357 2358
}

2359
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
2360
{
2361
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2362
	struct tipc_sock *tsk;
2363

2364
	rcu_read_lock();
2365
	tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params);
2366 2367 2368
	if (tsk)
		sock_hold(&tsk->sk);
	rcu_read_unlock();
2369

2370
	return tsk;
2371 2372
}

2373
static int tipc_sk_insert(struct tipc_sock *tsk)
2374
{
2375 2376 2377
	struct sock *sk = &tsk->sk;
	struct net *net = sock_net(sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2378 2379
	u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1;
	u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT;
2380

2381 2382 2383 2384 2385 2386
	while (remaining--) {
		portid++;
		if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT))
			portid = TIPC_MIN_PORT;
		tsk->portid = portid;
		sock_hold(&tsk->sk);
2387 2388
		if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node,
						   tsk_rht_params))
2389 2390
			return 0;
		sock_put(&tsk->sk);
2391 2392
	}

2393
	return -1;
2394 2395
}

2396
static void tipc_sk_remove(struct tipc_sock *tsk)
2397
{
2398
	struct sock *sk = &tsk->sk;
2399
	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
2400

2401
	if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
2402
		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
2403
		__sock_put(sk);
2404 2405 2406
	}
}

2407 2408 2409 2410 2411 2412 2413
static const struct rhashtable_params tsk_rht_params = {
	.nelem_hint = 192,
	.head_offset = offsetof(struct tipc_sock, node),
	.key_offset = offsetof(struct tipc_sock, portid),
	.key_len = sizeof(u32), /* portid */
	.max_size = 1048576,
	.min_size = 256,
2414
	.automatic_shrinking = true,
2415 2416
};

2417
int tipc_sk_rht_init(struct net *net)
2418
{
2419
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2420 2421

	return rhashtable_init(&tn->sk_rht, &tsk_rht_params);
2422 2423
}

2424
void tipc_sk_rht_destroy(struct net *net)
2425
{
2426 2427
	struct tipc_net *tn = net_generic(net, tipc_net_id);

2428 2429
	/* Wait for socket readers to complete */
	synchronize_net();
2430

2431
	rhashtable_destroy(&tn->sk_rht);
2432 2433
}

J
Jon Maloy 已提交
2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479
static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
{
	struct net *net = sock_net(&tsk->sk);
	u32 domain = addr_domain(net, mreq->scope);
	struct tipc_group *grp = tsk->group;
	struct tipc_msg *hdr = &tsk->phdr;
	struct tipc_name_seq seq;
	int rc;

	if (mreq->type < TIPC_RESERVED_TYPES)
		return -EACCES;
	if (grp)
		return -EACCES;
	grp = tipc_group_create(net, tsk->portid, mreq);
	if (!grp)
		return -ENOMEM;
	tsk->group = grp;
	msg_set_lookup_scope(hdr, mreq->scope);
	msg_set_nametype(hdr, mreq->type);
	msg_set_dest_droppable(hdr, true);
	seq.type = mreq->type;
	seq.lower = mreq->instance;
	seq.upper = seq.lower;
	tipc_nametbl_build_group(net, grp, mreq->type, domain);
	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
	if (rc)
		tipc_group_delete(net, grp);
	return rc;
}

static int tipc_sk_leave(struct tipc_sock *tsk)
{
	struct net *net = sock_net(&tsk->sk);
	struct tipc_group *grp = tsk->group;
	struct tipc_name_seq seq;
	int scope;

	if (!grp)
		return -EINVAL;
	tipc_group_self(grp, &seq, &scope);
	tipc_group_delete(net, grp);
	tsk->group = NULL;
	tipc_sk_withdraw(tsk, scope, &seq);
	return 0;
}

P
Per Liden 已提交
2480
/**
2481
 * tipc_setsockopt - set socket option
P
Per Liden 已提交
2482 2483 2484 2485 2486
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: pointer to new option value
 * @ol: length of option value
2487 2488
 *
 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
P
Per Liden 已提交
2489
 * (to ease compatibility).
2490
 *
P
Per Liden 已提交
2491 2492
 * Returns 0 on success, errno otherwise
 */
2493 2494
static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, unsigned int ol)
P
Per Liden 已提交
2495
{
2496
	struct sock *sk = sock->sk;
2497
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2498
	struct tipc_group_req mreq;
2499
	u32 value = 0;
2500
	int res = 0;
P
Per Liden 已提交
2501

2502 2503
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return 0;
P
Per Liden 已提交
2504 2505
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2506 2507 2508 2509 2510 2511 2512 2513

	switch (opt) {
	case TIPC_IMPORTANCE:
	case TIPC_SRC_DROPPABLE:
	case TIPC_DEST_DROPPABLE:
	case TIPC_CONN_TIMEOUT:
		if (ol < sizeof(value))
			return -EINVAL;
J
Jon Maloy 已提交
2514 2515 2516 2517 2518 2519 2520 2521
		if (get_user(value, (u32 __user *)ov))
			return -EFAULT;
		break;
	case TIPC_GROUP_JOIN:
		if (ol < sizeof(mreq))
			return -EINVAL;
		if (copy_from_user(&mreq, ov, sizeof(mreq)))
			return -EFAULT;
2522 2523 2524 2525 2526
		break;
	default:
		if (ov || ol)
			return -EINVAL;
	}
P
Per Liden 已提交
2527

2528
	lock_sock(sk);
2529

P
Per Liden 已提交
2530 2531
	switch (opt) {
	case TIPC_IMPORTANCE:
2532
		res = tsk_set_importance(tsk, value);
P
Per Liden 已提交
2533 2534 2535
		break;
	case TIPC_SRC_DROPPABLE:
		if (sock->type != SOCK_STREAM)
2536
			tsk_set_unreliable(tsk, value);
2537
		else
P
Per Liden 已提交
2538 2539 2540
			res = -ENOPROTOOPT;
		break;
	case TIPC_DEST_DROPPABLE:
2541
		tsk_set_unreturnable(tsk, value);
P
Per Liden 已提交
2542 2543
		break;
	case TIPC_CONN_TIMEOUT:
2544
		tipc_sk(sk)->conn_timeout = value;
P
Per Liden 已提交
2545
		break;
2546 2547 2548 2549 2550 2551 2552 2553
	case TIPC_MCAST_BROADCAST:
		tsk->mc_method.rcast = false;
		tsk->mc_method.mandatory = true;
		break;
	case TIPC_MCAST_REPLICAST:
		tsk->mc_method.rcast = true;
		tsk->mc_method.mandatory = true;
		break;
J
Jon Maloy 已提交
2554 2555 2556 2557 2558 2559
	case TIPC_GROUP_JOIN:
		res = tipc_sk_join(tsk, &mreq);
		break;
	case TIPC_GROUP_LEAVE:
		res = tipc_sk_leave(tsk);
		break;
P
Per Liden 已提交
2560 2561 2562 2563
	default:
		res = -EINVAL;
	}

2564 2565
	release_sock(sk);

P
Per Liden 已提交
2566 2567 2568 2569
	return res;
}

/**
2570
 * tipc_getsockopt - get socket option
P
Per Liden 已提交
2571 2572 2573 2574 2575
 * @sock: socket structure
 * @lvl: option level
 * @opt: option identifier
 * @ov: receptacle for option value
 * @ol: receptacle for length of option value
2576 2577
 *
 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
P
Per Liden 已提交
2578
 * (to ease compatibility).
2579
 *
P
Per Liden 已提交
2580 2581
 * Returns 0 on success, errno otherwise
 */
2582 2583
static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
			   char __user *ov, int __user *ol)
P
Per Liden 已提交
2584
{
2585
	struct sock *sk = sock->sk;
2586
	struct tipc_sock *tsk = tipc_sk(sk);
J
Jon Maloy 已提交
2587 2588
	struct tipc_name_seq seq;
	int len, scope;
P
Per Liden 已提交
2589
	u32 value;
2590
	int res;
P
Per Liden 已提交
2591

2592 2593
	if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
		return put_user(0, ol);
P
Per Liden 已提交
2594 2595
	if (lvl != SOL_TIPC)
		return -ENOPROTOOPT;
2596 2597
	res = get_user(len, ol);
	if (res)
2598
		return res;
P
Per Liden 已提交
2599

2600
	lock_sock(sk);
P
Per Liden 已提交
2601 2602 2603

	switch (opt) {
	case TIPC_IMPORTANCE:
2604
		value = tsk_importance(tsk);
P
Per Liden 已提交
2605 2606
		break;
	case TIPC_SRC_DROPPABLE:
2607
		value = tsk_unreliable(tsk);
P
Per Liden 已提交
2608 2609
		break;
	case TIPC_DEST_DROPPABLE:
2610
		value = tsk_unreturnable(tsk);
P
Per Liden 已提交
2611 2612
		break;
	case TIPC_CONN_TIMEOUT:
2613
		value = tsk->conn_timeout;
2614
		/* no need to set "res", since already 0 at this point */
P
Per Liden 已提交
2615
		break;
2616
	case TIPC_NODE_RECVQ_DEPTH:
2617
		value = 0; /* was tipc_queue_size, now obsolete */
2618
		break;
2619
	case TIPC_SOCK_RECVQ_DEPTH:
2620 2621
		value = skb_queue_len(&sk->sk_receive_queue);
		break;
J
Jon Maloy 已提交
2622 2623 2624 2625 2626 2627
	case TIPC_GROUP_JOIN:
		seq.type = 0;
		if (tsk->group)
			tipc_group_self(tsk->group, &seq, &scope);
		value = seq.type;
		break;
P
Per Liden 已提交
2628 2629 2630 2631
	default:
		res = -EINVAL;
	}

2632 2633
	release_sock(sk);

2634 2635
	if (res)
		return res;	/* "get" failed */
P
Per Liden 已提交
2636

2637 2638 2639 2640 2641 2642 2643
	if (len < sizeof(value))
		return -EINVAL;

	if (copy_to_user(ov, &value, sizeof(value)))
		return -EFAULT;

	return put_user(sizeof(value), ol);
P
Per Liden 已提交
2644 2645
}

2646
static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
E
Erik Hugne 已提交
2647
{
2648
	struct sock *sk = sock->sk;
E
Erik Hugne 已提交
2649 2650 2651 2652 2653 2654 2655
	struct tipc_sioc_ln_req lnr;
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case SIOCGETLINKNAME:
		if (copy_from_user(&lnr, argp, sizeof(lnr)))
			return -EFAULT;
2656 2657
		if (!tipc_node_get_linkname(sock_net(sk),
					    lnr.bearer_id & 0xffff, lnr.peer,
E
Erik Hugne 已提交
2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668
					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
			if (copy_to_user(argp, &lnr, sizeof(lnr)))
				return -EFAULT;
			return 0;
		}
		return -EADDRNOTAVAIL;
	default:
		return -ENOIOCTLCMD;
	}
}

2669 2670 2671 2672
static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
{
	struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
	struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
E
Erik Hugne 已提交
2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687
	u32 onode = tipc_own_addr(sock_net(sock1->sk));

	tsk1->peer.family = AF_TIPC;
	tsk1->peer.addrtype = TIPC_ADDR_ID;
	tsk1->peer.scope = TIPC_NODE_SCOPE;
	tsk1->peer.addr.id.ref = tsk2->portid;
	tsk1->peer.addr.id.node = onode;
	tsk2->peer.family = AF_TIPC;
	tsk2->peer.addrtype = TIPC_ADDR_ID;
	tsk2->peer.scope = TIPC_NODE_SCOPE;
	tsk2->peer.addr.id.ref = tsk1->portid;
	tsk2->peer.addr.id.node = onode;

	tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
	tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
2688 2689 2690
	return 0;
}

2691 2692
/* Protocol switches for the various types of TIPC sockets */

2693
static const struct proto_ops msg_ops = {
2694
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2695
	.family		= AF_TIPC,
2696 2697 2698
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
E
Erik Hugne 已提交
2699
	.socketpair	= tipc_socketpair,
2700
	.accept		= sock_no_accept,
2701 2702
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2703
	.ioctl		= tipc_ioctl,
2704
	.listen		= sock_no_listen,
2705 2706 2707 2708 2709
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_sendmsg,
	.recvmsg	= tipc_recvmsg,
2710 2711
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2712 2713
};

2714
static const struct proto_ops packet_ops = {
2715
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2716
	.family		= AF_TIPC,
2717 2718 2719
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2720
	.socketpair	= tipc_socketpair,
2721 2722 2723
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2724
	.ioctl		= tipc_ioctl,
2725 2726 2727 2728 2729 2730
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
	.sendmsg	= tipc_send_packet,
	.recvmsg	= tipc_recvmsg,
2731 2732
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2733 2734
};

2735
static const struct proto_ops stream_ops = {
2736
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2737
	.family		= AF_TIPC,
2738 2739 2740
	.release	= tipc_release,
	.bind		= tipc_bind,
	.connect	= tipc_connect,
2741
	.socketpair	= tipc_socketpair,
2742 2743 2744
	.accept		= tipc_accept,
	.getname	= tipc_getname,
	.poll		= tipc_poll,
E
Erik Hugne 已提交
2745
	.ioctl		= tipc_ioctl,
2746 2747 2748 2749
	.listen		= tipc_listen,
	.shutdown	= tipc_shutdown,
	.setsockopt	= tipc_setsockopt,
	.getsockopt	= tipc_getsockopt,
2750
	.sendmsg	= tipc_sendstream,
2751
	.recvmsg	= tipc_recvstream,
2752 2753
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage
P
Per Liden 已提交
2754 2755
};

2756
static const struct net_proto_family tipc_family_ops = {
2757
	.owner		= THIS_MODULE,
P
Per Liden 已提交
2758
	.family		= AF_TIPC,
2759
	.create		= tipc_sk_create
P
Per Liden 已提交
2760 2761 2762 2763 2764
};

static struct proto tipc_proto = {
	.name		= "TIPC",
	.owner		= THIS_MODULE,
2765 2766
	.obj_size	= sizeof(struct tipc_sock),
	.sysctl_rmem	= sysctl_tipc_rmem
P
Per Liden 已提交
2767 2768 2769
};

/**
2770
 * tipc_socket_init - initialize TIPC socket interface
2771
 *
P
Per Liden 已提交
2772 2773
 * Returns 0 on success, errno otherwise
 */
2774
int tipc_socket_init(void)
P
Per Liden 已提交
2775 2776 2777
{
	int res;

2778
	res = proto_register(&tipc_proto, 1);
P
Per Liden 已提交
2779
	if (res) {
2780
		pr_err("Failed to register TIPC protocol type\n");
P
Per Liden 已提交
2781 2782 2783 2784 2785
		goto out;
	}

	res = sock_register(&tipc_family_ops);
	if (res) {
2786
		pr_err("Failed to register TIPC socket type\n");
P
Per Liden 已提交
2787 2788 2789 2790 2791 2792 2793 2794
		proto_unregister(&tipc_proto);
		goto out;
	}
 out:
	return res;
}

/**
2795
 * tipc_socket_stop - stop TIPC socket interface
P
Per Liden 已提交
2796
 */
2797
void tipc_socket_stop(void)
P
Per Liden 已提交
2798 2799 2800 2801
{
	sock_unregister(tipc_family_ops.family);
	proto_unregister(&tipc_proto);
}
2802 2803

/* Caller should hold socket lock for the passed tipc socket. */
2804
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838
{
	u32 peer_node;
	u32 peer_port;
	struct nlattr *nest;

	peer_node = tsk_peer_node(tsk);
	peer_port = tsk_peer_port(tsk);

	nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON);

	if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node))
		goto msg_full;
	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
		goto msg_full;

	if (tsk->conn_type != 0) {
		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
			goto msg_full;
		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
			goto msg_full;
	}
	nla_nest_end(skb, nest);

	return 0;

msg_full:
	nla_nest_cancel(skb, nest);

	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2839 2840
static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
			    struct tipc_sock *tsk)
2841 2842 2843 2844
{
	int err;
	void *hdr;
	struct nlattr *attrs;
2845 2846
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2847
	struct sock *sk = &tsk->sk;
2848 2849

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2850
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
2851 2852 2853 2854 2855 2856
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
	if (!attrs)
		goto genlmsg_cancel;
2857
	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
2858
		goto attr_msg_cancel;
2859
	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
2860 2861
		goto attr_msg_cancel;

2862
	if (tipc_sk_connected(sk)) {
2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886
		err = __tipc_nl_add_sk_con(skb, tsk);
		if (err)
			goto attr_msg_cancel;
	} else if (!list_empty(&tsk->publications)) {
		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
			goto attr_msg_cancel;
	}
	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
	struct tipc_sock *tsk;
2887 2888
	const struct bucket_table *tbl;
	struct rhash_head *pos;
2889 2890
	struct net *net = sock_net(skb->sk);
	struct tipc_net *tn = net_generic(net, tipc_net_id);
2891 2892
	u32 tbl_id = cb->args[0];
	u32 prev_portid = cb->args[1];
2893

2894
	rcu_read_lock();
2895
	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
2896 2897
	for (; tbl_id < tbl->size; tbl_id++) {
		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
2898
			spin_lock_bh(&tsk->sk.sk_lock.slock);
2899 2900 2901 2902 2903
			if (prev_portid && prev_portid != tsk->portid) {
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				continue;
			}

2904
			err = __tipc_nl_add_sk(skb, cb, tsk);
2905 2906 2907 2908 2909 2910
			if (err) {
				prev_portid = tsk->portid;
				spin_unlock_bh(&tsk->sk.sk_lock.slock);
				goto out;
			}
			prev_portid = 0;
2911 2912
			spin_unlock_bh(&tsk->sk.sk_lock.slock);
		}
2913
	}
2914
out:
2915
	rcu_read_unlock();
2916 2917
	cb->args[0] = tbl_id;
	cb->args[1] = prev_portid;
2918 2919 2920

	return skb->len;
}
2921 2922

/* Caller should hold socket lock for the passed tipc socket. */
2923 2924 2925
static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
				 struct netlink_callback *cb,
				 struct publication *publ)
2926 2927 2928 2929 2930
{
	void *hdr;
	struct nlattr *attrs;

	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2931
			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET);
2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961
	if (!hdr)
		goto msg_cancel;

	attrs = nla_nest_start(skb, TIPC_NLA_PUBL);
	if (!attrs)
		goto genlmsg_cancel;

	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
		goto attr_msg_cancel;
	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
		goto attr_msg_cancel;

	nla_nest_end(skb, attrs);
	genlmsg_end(skb, hdr);

	return 0;

attr_msg_cancel:
	nla_nest_cancel(skb, attrs);
genlmsg_cancel:
	genlmsg_cancel(skb, hdr);
msg_cancel:
	return -EMSGSIZE;
}

/* Caller should hold socket lock for the passed tipc socket. */
2962 2963 2964
static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
				  struct netlink_callback *cb,
				  struct tipc_sock *tsk, u32 *last_publ)
2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004
{
	int err;
	struct publication *p;

	if (*last_publ) {
		list_for_each_entry(p, &tsk->publications, pport_list) {
			if (p->key == *last_publ)
				break;
		}
		if (p->key != *last_publ) {
			/* We never set seq or call nl_dump_check_consistent()
			 * this means that setting prev_seq here will cause the
			 * consistence check to fail in the netlink callback
			 * handler. Resulting in the last NLMSG_DONE message
			 * having the NLM_F_DUMP_INTR flag set.
			 */
			cb->prev_seq = 1;
			*last_publ = 0;
			return -EPIPE;
		}
	} else {
		p = list_first_entry(&tsk->publications, struct publication,
				     pport_list);
	}

	list_for_each_entry_from(p, &tsk->publications, pport_list) {
		err = __tipc_nl_add_sk_publ(skb, cb, p);
		if (err) {
			*last_publ = p->key;
			return err;
		}
	}
	*last_publ = 0;

	return 0;
}

int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
	int err;
3005
	u32 tsk_portid = cb->args[0];
3006 3007
	u32 last_publ = cb->args[1];
	u32 done = cb->args[2];
3008
	struct net *net = sock_net(skb->sk);
3009 3010
	struct tipc_sock *tsk;

3011
	if (!tsk_portid) {
3012 3013 3014 3015 3016 3017 3018
		struct nlattr **attrs;
		struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];

		err = tipc_nlmsg_parse(cb->nlh, &attrs);
		if (err)
			return err;

3019 3020 3021
		if (!attrs[TIPC_NLA_SOCK])
			return -EINVAL;

3022 3023
		err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX,
				       attrs[TIPC_NLA_SOCK],
3024
				       tipc_nl_sock_policy, NULL);
3025 3026 3027 3028 3029 3030
		if (err)
			return err;

		if (!sock[TIPC_NLA_SOCK_REF])
			return -EINVAL;

3031
		tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]);
3032 3033 3034 3035 3036
	}

	if (done)
		return 0;

3037
	tsk = tipc_sk_lookup(net, tsk_portid);
3038 3039 3040 3041 3042 3043 3044 3045
	if (!tsk)
		return -EINVAL;

	lock_sock(&tsk->sk);
	err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ);
	if (!err)
		done = 1;
	release_sock(&tsk->sk);
3046
	sock_put(&tsk->sk);
3047

3048
	cb->args[0] = tsk_portid;
3049 3050 3051 3052 3053
	cb->args[1] = last_publ;
	cb->args[2] = done;

	return skb->len;
}