inet_diag.c 30.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * inet_diag.c	Module for monitoring INET transport protocols sockets.
L
Linus Torvalds 已提交
3 4 5 6 7 8 9 10 11
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

12
#include <linux/kernel.h>
L
Linus Torvalds 已提交
13 14 15 16
#include <linux/module.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/random.h>
17
#include <linux/slab.h>
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/time.h>

#include <net/icmp.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
26 27 28 29
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet_timewait_sock.h>
#include <net/inet6_hashtables.h>
30
#include <net/netlink.h>
L
Linus Torvalds 已提交
31 32 33 34

#include <linux/inet.h>
#include <linux/stddef.h>

35
#include <linux/inet_diag.h>
P
Pavel Emelyanov 已提交
36
#include <linux/sock_diag.h>
L
Linus Torvalds 已提交
37

38 39
static const struct inet_diag_handler **inet_diag_table;

40
struct inet_diag_entry {
E
Eric Dumazet 已提交
41 42
	const __be32 *saddr;
	const __be32 *daddr;
L
Linus Torvalds 已提交
43 44 45 46
	u16 sport;
	u16 dport;
	u16 family;
	u16 userlocks;
47
	u32 ifindex;
48
	u32 mark;
L
Linus Torvalds 已提交
49 50
};

51 52
static DEFINE_MUTEX(inet_diag_table_mutex);

53
static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
54
{
55
	if (!inet_diag_table[proto])
56 57
		request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
			       NETLINK_SOCK_DIAG, AF_INET, proto);
58 59

	mutex_lock(&inet_diag_table_mutex);
60
	if (!inet_diag_table[proto])
61 62
		return ERR_PTR(-ENOENT);

63
	return inet_diag_table[proto];
64 65
}

E
Eric Dumazet 已提交
66
static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
67 68 69 70
{
	mutex_unlock(&inet_diag_table_mutex);
}

71
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
{
	r->idiag_family = sk->sk_family;

	r->id.idiag_sport = htons(sk->sk_num);
	r->id.idiag_dport = sk->sk_dport;
	r->id.idiag_if = sk->sk_bound_dev_if;
	sock_diag_save_cookie(sk, r->id.idiag_cookie);

#if IS_ENABLED(CONFIG_IPV6)
	if (sk->sk_family == AF_INET6) {
		*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
		*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
	} else
#endif
	{
	memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
	memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));

	r->id.idiag_src[0] = sk->sk_rcv_saddr;
	r->id.idiag_dst[0] = sk->sk_daddr;
	}
}
94
EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
95

96 97 98 99 100 101
static size_t inet_sk_attr_size(void)
{
	return	  nla_total_size(sizeof(struct tcp_info))
		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
		+ nla_total_size(1) /* INET_DIAG_TOS */
		+ nla_total_size(1) /* INET_DIAG_TCLASS */
102
		+ nla_total_size(4) /* INET_DIAG_MARK */
103 104 105 106 107 108 109 110
		+ nla_total_size(sizeof(struct inet_diag_meminfo))
		+ nla_total_size(sizeof(struct inet_diag_msg))
		+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
		+ nla_total_size(TCP_CA_NAME_MAX)
		+ nla_total_size(sizeof(struct tcpvegas_info))
		+ 64;
}

111 112
int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
			     struct inet_diag_msg *r, int ext,
113 114
			     struct user_namespace *user_ns,
			     bool net_admin)
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
{
	const struct inet_sock *inet = inet_sk(sk);

	if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
		goto errout;

	/* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
	 * hence this needs to be included regardless of socket family.
	 */
	if (ext & (1 << (INET_DIAG_TOS - 1)))
		if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
			goto errout;

#if IS_ENABLED(CONFIG_IPV6)
	if (r->idiag_family == AF_INET6) {
		if (ext & (1 << (INET_DIAG_TCLASS - 1)))
			if (nla_put_u8(skb, INET_DIAG_TCLASS,
				       inet6_sk(sk)->tclass) < 0)
				goto errout;

		if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
		    nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk)))
			goto errout;
	}
#endif

141 142 143
	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
		goto errout;

144 145 146 147 148 149 150 151 152
	r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
	r->idiag_inode = sock_i_ino(sk);

	return 0;
errout:
	return 1;
}
EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);

153
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
154
		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
E
Eric Dumazet 已提交
155 156
		      struct user_namespace *user_ns,
		      u32 portid, u32 seq, u16 nlmsg_flags,
157 158
		      const struct nlmsghdr *unlh,
		      bool net_admin)
L
Linus Torvalds 已提交
159
{
160
	const struct tcp_congestion_ops *ca_ops;
E
Eric Dumazet 已提交
161 162
	const struct inet_diag_handler *handler;
	int ext = req->idiag_ext;
163
	struct inet_diag_msg *r;
L
Linus Torvalds 已提交
164
	struct nlmsghdr  *nlh;
165
	struct nlattr *attr;
166 167
	void *info = NULL;

168
	handler = inet_diag_table[req->sdiag_protocol];
E
Eric Dumazet 已提交
169
	BUG_ON(!handler);
L
Linus Torvalds 已提交
170

171
	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
172 173
			nlmsg_flags);
	if (!nlh)
174
		return -EMSGSIZE;
175

176
	r = nlmsg_data(nlh);
177
	BUG_ON(!sk_fullsock(sk));
178

179
	inet_diag_msg_common_fill(r, sk);
180 181 182
	r->idiag_state = sk->sk_state;
	r->idiag_timer = 0;
	r->idiag_retrans = 0;
L
Linus Torvalds 已提交
183

184
	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
185 186
		goto errout;

187 188 189 190 191 192 193 194 195 196
	if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
		struct inet_diag_meminfo minfo = {
			.idiag_rmem = sk_rmem_alloc_get(sk),
			.idiag_wmem = sk->sk_wmem_queued,
			.idiag_fmem = sk->sk_forward_alloc,
			.idiag_tmem = sk_wmem_alloc_get(sk),
		};

		if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0)
			goto errout;
197 198
	}

199 200
	if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
		if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
201
			goto errout;
202

203 204 205 206 207 208 209 210 211
	/*
	 * RAW sockets might have user-defined protocols assigned,
	 * so report the one supplied on socket creation.
	 */
	if (sk->sk_type == SOCK_RAW) {
		if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))
			goto errout;
	}

E
Eric Dumazet 已提交
212
	if (!icsk) {
213
		handler->idiag_get_info(sk, r, NULL);
214 215 216
		goto out;
	}

N
Nandita Dukkipati 已提交
217 218 219
	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
220 221
		r->idiag_timer = 1;
		r->idiag_retrans = icsk->icsk_retransmits;
222 223
		r->idiag_expires =
			jiffies_to_msecs(icsk->icsk_timeout - jiffies);
224
	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
225 226
		r->idiag_timer = 4;
		r->idiag_retrans = icsk->icsk_probes_out;
227 228
		r->idiag_expires =
			jiffies_to_msecs(icsk->icsk_timeout - jiffies);
L
Linus Torvalds 已提交
229
	} else if (timer_pending(&sk->sk_timer)) {
230 231
		r->idiag_timer = 2;
		r->idiag_retrans = icsk->icsk_probes_out;
232 233
		r->idiag_expires =
			jiffies_to_msecs(sk->sk_timer.expires - jiffies);
L
Linus Torvalds 已提交
234
	} else {
235 236
		r->idiag_timer = 0;
		r->idiag_expires = 0;
L
Linus Torvalds 已提交
237
	}
238

239
	if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
240 241 242
		attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
					 handler->idiag_info_size,
					 INET_DIAG_PAD);
243 244
		if (!attr)
			goto errout;
245

246
		info = nla_data(attr);
L
Linus Torvalds 已提交
247 248
	}

249 250 251 252 253 254 255 256 257
	if (ext & (1 << (INET_DIAG_CONG - 1))) {
		int err = 0;

		rcu_read_lock();
		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
		if (ca_ops)
			err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
		rcu_read_unlock();
		if (err < 0)
258
			goto errout;
259
	}
260

261
	handler->idiag_get_info(sk, r, info);
L
Linus Torvalds 已提交
262

263
	if (sk->sk_state < TCP_TIME_WAIT) {
264 265 266
		union tcp_cc_info info;
		size_t sz = 0;
		int attr;
267 268 269 270

		rcu_read_lock();
		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
		if (ca_ops && ca_ops->get_info)
271
			sz = ca_ops->get_info(sk, ext, &attr, &info);
272
		rcu_read_unlock();
273
		if (sz && nla_put(skb, attr, sz, &info) < 0)
274 275
			goto errout;
	}
L
Linus Torvalds 已提交
276

277
out:
278 279
	nlmsg_end(skb, nlh);
	return 0;
L
Linus Torvalds 已提交
280

281 282
errout:
	nlmsg_cancel(skb, nlh);
283
	return -EMSGSIZE;
L
Linus Torvalds 已提交
284
}
285 286 287
EXPORT_SYMBOL_GPL(inet_sk_diag_fill);

static int inet_csk_diag_fill(struct sock *sk,
E
Eric Dumazet 已提交
288
			      struct sk_buff *skb,
289
			      const struct inet_diag_req_v2 *req,
290
			      struct user_namespace *user_ns,
291
			      u32 portid, u32 seq, u16 nlmsg_flags,
292 293
			      const struct nlmsghdr *unlh,
			      bool net_admin)
294
{
295 296
	return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns,
				 portid, seq, nlmsg_flags, unlh, net_admin);
297
}
L
Linus Torvalds 已提交
298

E
Eric Dumazet 已提交
299
static int inet_twsk_diag_fill(struct sock *sk,
E
Eric Dumazet 已提交
300
			       struct sk_buff *skb,
301
			       u32 portid, u32 seq, u16 nlmsg_flags,
302 303
			       const struct nlmsghdr *unlh)
{
E
Eric Dumazet 已提交
304
	struct inet_timewait_sock *tw = inet_twsk(sk);
305
	struct inet_diag_msg *r;
306
	struct nlmsghdr *nlh;
307
	long tmo;
308

309
	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
310 311
			nlmsg_flags);
	if (!nlh)
312
		return -EMSGSIZE;
313

314
	r = nlmsg_data(nlh);
315 316
	BUG_ON(tw->tw_state != TCP_TIME_WAIT);

317
	tmo = tw->tw_timer.expires - jiffies;
318 319 320
	if (tmo < 0)
		tmo = 0;

321
	inet_diag_msg_common_fill(r, sk);
322
	r->idiag_retrans      = 0;
323

324 325
	r->idiag_state	      = tw->tw_substate;
	r->idiag_timer	      = 3;
326
	r->idiag_expires      = jiffies_to_msecs(tmo);
327 328 329 330
	r->idiag_rqueue	      = 0;
	r->idiag_wqueue	      = 0;
	r->idiag_uid	      = 0;
	r->idiag_inode	      = 0;
331

332 333
	nlmsg_end(skb, nlh);
	return 0;
334 335
}

336 337
static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
			      u32 portid, u32 seq, u16 nlmsg_flags,
338
			      const struct nlmsghdr *unlh, bool net_admin)
339
{
340
	struct request_sock *reqsk = inet_reqsk(sk);
341 342 343 344 345 346 347 348 349 350 351 352 353
	struct inet_diag_msg *r;
	struct nlmsghdr *nlh;
	long tmo;

	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
			nlmsg_flags);
	if (!nlh)
		return -EMSGSIZE;

	r = nlmsg_data(nlh);
	inet_diag_msg_common_fill(r, sk);
	r->idiag_state = TCP_SYN_RECV;
	r->idiag_timer = 1;
354
	r->idiag_retrans = reqsk->num_retrans;
355 356 357 358

	BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
		     offsetof(struct sock, sk_cookie));

359
	tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
360 361 362 363 364 365
	r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
	r->idiag_rqueue	= 0;
	r->idiag_wqueue	= 0;
	r->idiag_uid	= 0;
	r->idiag_inode	= 0;

366 367 368 369
	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
				     inet_rsk(reqsk)->ir_mark))
		return -EMSGSIZE;

370 371 372 373
	nlmsg_end(skb, nlh);
	return 0;
}

374
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
375
			const struct inet_diag_req_v2 *r,
376
			struct user_namespace *user_ns,
377
			u32 portid, u32 seq, u16 nlmsg_flags,
378
			const struct nlmsghdr *unlh, bool net_admin)
379 380
{
	if (sk->sk_state == TCP_TIME_WAIT)
381
		return inet_twsk_diag_fill(sk, skb, portid, seq,
382 383
					   nlmsg_flags, unlh);

384 385
	if (sk->sk_state == TCP_NEW_SYN_RECV)
		return inet_req_diag_fill(sk, skb, portid, seq,
386
					  nlmsg_flags, unlh, net_admin);
387

388
	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
389
				  nlmsg_flags, unlh, net_admin);
390 391
}

392 393 394
struct sock *inet_diag_find_one_icsk(struct net *net,
				     struct inet_hashinfo *hashinfo,
				     const struct inet_diag_req_v2 *req)
L
Linus Torvalds 已提交
395
{
E
Eric Dumazet 已提交
396
	struct sock *sk;
397

398
	rcu_read_lock();
E
Eric Dumazet 已提交
399
	if (req->sdiag_family == AF_INET)
400
		sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
401 402
				 req->id.idiag_dport, req->id.idiag_src[0],
				 req->id.idiag_sport, req->id.idiag_if);
E
Eric Dumazet 已提交
403
#if IS_ENABLED(CONFIG_IPV6)
404 405 406
	else if (req->sdiag_family == AF_INET6) {
		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
		    ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
407
			sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
408 409 410
					 req->id.idiag_dport, req->id.idiag_src[3],
					 req->id.idiag_sport, req->id.idiag_if);
		else
411
			sk = inet6_lookup(net, hashinfo, NULL, 0,
412 413 414 415 416 417
					  (struct in6_addr *)req->id.idiag_dst,
					  req->id.idiag_dport,
					  (struct in6_addr *)req->id.idiag_src,
					  req->id.idiag_sport,
					  req->id.idiag_if);
	}
L
Linus Torvalds 已提交
418
#endif
419 420
	else {
		rcu_read_unlock();
421
		return ERR_PTR(-EINVAL);
422 423
	}
	rcu_read_unlock();
E
Eric Dumazet 已提交
424
	if (!sk)
425
		return ERR_PTR(-ENOENT);
L
Linus Torvalds 已提交
426

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
		sock_gen_put(sk);
		return ERR_PTR(-ENOENT);
	}

	return sk;
}
EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);

int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
			    struct sk_buff *in_skb,
			    const struct nlmsghdr *nlh,
			    const struct inet_diag_req_v2 *req)
{
	struct net *net = sock_net(in_skb->sk);
	struct sk_buff *rep;
	struct sock *sk;
	int err;

	sk = inet_diag_find_one_icsk(net, hashinfo, req);
	if (IS_ERR(sk))
		return PTR_ERR(sk);
L
Linus Torvalds 已提交
449

450
	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
451 452
	if (!rep) {
		err = -ENOMEM;
L
Linus Torvalds 已提交
453
		goto out;
454
	}
L
Linus Torvalds 已提交
455

456
	err = sk_diag_fill(sk, rep, req,
457
			   sk_user_ns(NETLINK_CB(in_skb).sk),
458
			   NETLINK_CB(in_skb).portid,
459 460
			   nlh->nlmsg_seq, 0, nlh,
			   netlink_net_capable(in_skb, CAP_NET_ADMIN));
461 462
	if (err < 0) {
		WARN_ON(err == -EMSGSIZE);
463
		nlmsg_free(rep);
464 465
		goto out;
	}
466
	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
467
			      MSG_DONTWAIT);
L
Linus Torvalds 已提交
468 469 470 471
	if (err > 0)
		err = 0;

out:
E
Eric Dumazet 已提交
472 473 474
	if (sk)
		sock_gen_put(sk);

475 476
	return err;
}
477
EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
478

479
static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
480
			       const struct nlmsghdr *nlh,
481
			       const struct inet_diag_req_v2 *req)
482 483 484 485 486 487 488
{
	const struct inet_diag_handler *handler;
	int err;

	handler = inet_diag_lock_handler(req->sdiag_protocol);
	if (IS_ERR(handler))
		err = PTR_ERR(handler);
489
	else if (cmd == SOCK_DIAG_BY_FAMILY)
490
		err = handler->dump_one(in_skb, nlh, req);
491 492 493 494
	else if (cmd == SOCK_DESTROY && handler->destroy)
		err = handler->destroy(in_skb, req);
	else
		err = -EOPNOTSUPP;
495
	inet_diag_unlock_handler(handler);
496

L
Linus Torvalds 已提交
497 498 499
	return err;
}

A
Al Viro 已提交
500
static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
L
Linus Torvalds 已提交
501 502 503 504 505 506 507 508 509 510
{
	int words = bits >> 5;

	bits &= 0x1f;

	if (words) {
		if (memcmp(a1, a2, words << 2))
			return 0;
	}
	if (bits) {
A
Al Viro 已提交
511 512
		__be32 w1, w2;
		__be32 mask;
L
Linus Torvalds 已提交
513 514 515 516 517 518 519 520 521 522 523 524 525

		w1 = a1[words];
		w2 = a2[words];

		mask = htonl((0xffffffff) << (32 - bits));

		if ((w1 ^ w2) & mask)
			return 0;
	}

	return 1;
}

526
static int inet_diag_bc_run(const struct nlattr *_bc,
E
Eric Dumazet 已提交
527
			    const struct inet_diag_entry *entry)
L
Linus Torvalds 已提交
528
{
529 530 531
	const void *bc = nla_data(_bc);
	int len = nla_len(_bc);

L
Linus Torvalds 已提交
532 533
	while (len > 0) {
		int yes = 1;
534
		const struct inet_diag_bc_op *op = bc;
L
Linus Torvalds 已提交
535 536

		switch (op->code) {
537
		case INET_DIAG_BC_NOP:
L
Linus Torvalds 已提交
538
			break;
539
		case INET_DIAG_BC_JMP:
L
Linus Torvalds 已提交
540 541
			yes = 0;
			break;
542
		case INET_DIAG_BC_S_GE:
L
Linus Torvalds 已提交
543 544
			yes = entry->sport >= op[1].no;
			break;
545
		case INET_DIAG_BC_S_LE:
546
			yes = entry->sport <= op[1].no;
L
Linus Torvalds 已提交
547
			break;
548
		case INET_DIAG_BC_D_GE:
L
Linus Torvalds 已提交
549 550
			yes = entry->dport >= op[1].no;
			break;
551
		case INET_DIAG_BC_D_LE:
L
Linus Torvalds 已提交
552 553
			yes = entry->dport <= op[1].no;
			break;
554
		case INET_DIAG_BC_AUTO:
L
Linus Torvalds 已提交
555 556
			yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
			break;
557
		case INET_DIAG_BC_S_COND:
558
		case INET_DIAG_BC_D_COND: {
E
Eric Dumazet 已提交
559 560
			const struct inet_diag_hostcond *cond;
			const __be32 *addr;
L
Linus Torvalds 已提交
561

E
Eric Dumazet 已提交
562
			cond = (const struct inet_diag_hostcond *)(op + 1);
L
Linus Torvalds 已提交
563
			if (cond->port != -1 &&
564
			    cond->port != (op->code == INET_DIAG_BC_S_COND ?
L
Linus Torvalds 已提交
565 566 567 568
					     entry->sport : entry->dport)) {
				yes = 0;
				break;
			}
569

570
			if (op->code == INET_DIAG_BC_S_COND)
L
Linus Torvalds 已提交
571 572 573 574
				addr = entry->saddr;
			else
				addr = entry->daddr;

575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
			if (cond->family != AF_UNSPEC &&
			    cond->family != entry->family) {
				if (entry->family == AF_INET6 &&
				    cond->family == AF_INET) {
					if (addr[0] == 0 && addr[1] == 0 &&
					    addr[2] == htonl(0xffff) &&
					    bitstring_match(addr + 3,
							    cond->addr,
							    cond->prefix_len))
						break;
				}
				yes = 0;
				break;
			}

			if (cond->prefix_len == 0)
				break;
592 593
			if (bitstring_match(addr, cond->addr,
					    cond->prefix_len))
L
Linus Torvalds 已提交
594 595 596 597
				break;
			yes = 0;
			break;
		}
598 599 600 601 602 603 604 605
		case INET_DIAG_BC_DEV_COND: {
			u32 ifindex;

			ifindex = *((const u32 *)(op + 1));
			if (ifindex != entry->ifindex)
				yes = 0;
			break;
		}
606 607 608 609 610 611 612 613
		case INET_DIAG_BC_MARK_COND: {
			struct inet_diag_markcond *cond;

			cond = (struct inet_diag_markcond *)(op + 1);
			if ((entry->mark & cond->mask) != cond->mark)
				yes = 0;
			break;
		}
L
Linus Torvalds 已提交
614 615
		}

616
		if (yes) {
L
Linus Torvalds 已提交
617 618 619 620 621 622 623
			len -= op->yes;
			bc += op->yes;
		} else {
			len -= op->no;
			bc += op->no;
		}
	}
E
Eric Dumazet 已提交
624
	return len == 0;
L
Linus Torvalds 已提交
625 626
}

627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
/* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV)
 */
static void entry_fill_addrs(struct inet_diag_entry *entry,
			     const struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
	if (sk->sk_family == AF_INET6) {
		entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32;
		entry->daddr = sk->sk_v6_daddr.s6_addr32;
	} else
#endif
	{
		entry->saddr = &sk->sk_rcv_saddr;
		entry->daddr = &sk->sk_daddr;
	}
}

644 645 646
int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
{
	struct inet_sock *inet = inet_sk(sk);
E
Eric Dumazet 已提交
647
	struct inet_diag_entry entry;
648

E
Eric Dumazet 已提交
649
	if (!bc)
650 651 652
		return 1;

	entry.family = sk->sk_family;
653
	entry_fill_addrs(&entry, sk);
654 655
	entry.sport = inet->inet_num;
	entry.dport = ntohs(inet->inet_dport);
656
	entry.ifindex = sk->sk_bound_dev_if;
657
	entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
658 659 660 661 662 663
	if (sk_fullsock(sk))
		entry.mark = sk->sk_mark;
	else if (sk->sk_state == TCP_NEW_SYN_RECV)
		entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
	else
		entry.mark = 0;
664 665 666 667 668

	return inet_diag_bc_run(bc, &entry);
}
EXPORT_SYMBOL_GPL(inet_diag_bc_sk);

L
Linus Torvalds 已提交
669 670 671
static int valid_cc(const void *bc, int len, int cc)
{
	while (len >= 0) {
672
		const struct inet_diag_bc_op *op = bc;
L
Linus Torvalds 已提交
673 674 675 676 677

		if (cc > len)
			return 0;
		if (cc == len)
			return 1;
678
		if (op->yes < 4 || op->yes & 3)
L
Linus Torvalds 已提交
679 680 681 682 683 684 685
			return 0;
		len -= op->yes;
		bc  += op->yes;
	}
	return 0;
}

686 687 688 689 690 691 692 693 694 695 696
/* data is u32 ifindex */
static bool valid_devcond(const struct inet_diag_bc_op *op, int len,
			  int *min_len)
{
	/* Check ifindex space. */
	*min_len += sizeof(u32);
	if (len < *min_len)
		return false;

	return true;
}
697 698 699 700 701
/* Validate an inet_diag_hostcond. */
static bool valid_hostcond(const struct inet_diag_bc_op *op, int len,
			   int *min_len)
{
	struct inet_diag_hostcond *cond;
E
Eric Dumazet 已提交
702
	int addr_len;
703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734

	/* Check hostcond space. */
	*min_len += sizeof(struct inet_diag_hostcond);
	if (len < *min_len)
		return false;
	cond = (struct inet_diag_hostcond *)(op + 1);

	/* Check address family and address length. */
	switch (cond->family) {
	case AF_UNSPEC:
		addr_len = 0;
		break;
	case AF_INET:
		addr_len = sizeof(struct in_addr);
		break;
	case AF_INET6:
		addr_len = sizeof(struct in6_addr);
		break;
	default:
		return false;
	}
	*min_len += addr_len;
	if (len < *min_len)
		return false;

	/* Check prefix length (in bits) vs address length (in bytes). */
	if (cond->prefix_len > 8 * addr_len)
		return false;

	return true;
}

735
/* Validate a port comparison operator. */
E
Eric Dumazet 已提交
736 737
static bool valid_port_comparison(const struct inet_diag_bc_op *op,
				  int len, int *min_len)
738 739 740 741 742 743 744 745
{
	/* Port comparisons put the port in a follow-on inet_diag_bc_op. */
	*min_len += sizeof(struct inet_diag_bc_op);
	if (len < *min_len)
		return false;
	return true;
}

746 747
static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
			   int *min_len)
L
Linus Torvalds 已提交
748
{
749 750 751 752 753 754 755 756
	*min_len += sizeof(struct inet_diag_markcond);
	return len >= *min_len;
}

static int inet_diag_bc_audit(const struct nlattr *attr,
			      const struct sk_buff *skb)
{
	bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
757 758 759 760 761 762 763 764
	const void *bytecode, *bc;
	int bytecode_len, len;

	if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
		return -EINVAL;

	bytecode = bc = nla_data(attr);
	len = bytecode_len = nla_len(attr);
L
Linus Torvalds 已提交
765 766

	while (len > 0) {
767
		int min_len = sizeof(struct inet_diag_bc_op);
E
Eric Dumazet 已提交
768
		const struct inet_diag_bc_op *op = bc;
L
Linus Torvalds 已提交
769 770

		switch (op->code) {
771 772
		case INET_DIAG_BC_S_COND:
		case INET_DIAG_BC_D_COND:
773 774
			if (!valid_hostcond(bc, len, &min_len))
				return -EINVAL;
775
			break;
776 777 778 779
		case INET_DIAG_BC_DEV_COND:
			if (!valid_devcond(bc, len, &min_len))
				return -EINVAL;
			break;
780 781 782 783
		case INET_DIAG_BC_S_GE:
		case INET_DIAG_BC_S_LE:
		case INET_DIAG_BC_D_GE:
		case INET_DIAG_BC_D_LE:
784
			if (!valid_port_comparison(bc, len, &min_len))
L
Linus Torvalds 已提交
785 786
				return -EINVAL;
			break;
787 788 789 790 791 792
		case INET_DIAG_BC_MARK_COND:
			if (!net_admin)
				return -EPERM;
			if (!valid_markcond(bc, len, &min_len))
				return -EINVAL;
			break;
793 794
		case INET_DIAG_BC_AUTO:
		case INET_DIAG_BC_JMP:
795
		case INET_DIAG_BC_NOP:
L
Linus Torvalds 已提交
796 797 798 799
			break;
		default:
			return -EINVAL;
		}
800 801 802 803 804 805 806 807 808

		if (op->code != INET_DIAG_BC_NOP) {
			if (op->no < min_len || op->no > len + 4 || op->no & 3)
				return -EINVAL;
			if (op->no < len &&
			    !valid_cc(bytecode, bytecode_len, len - op->no))
				return -EINVAL;
		}

809
		if (op->yes < min_len || op->yes > len + 4 || op->yes & 3)
810
			return -EINVAL;
811
		bc  += op->yes;
L
Linus Torvalds 已提交
812 813 814 815 816
		len -= op->yes;
	}
	return len == 0 ? 0 : -EINVAL;
}

817 818
static int inet_csk_diag_dump(struct sock *sk,
			      struct sk_buff *skb,
819
			      struct netlink_callback *cb,
820
			      const struct inet_diag_req_v2 *r,
821 822
			      const struct nlattr *bc,
			      bool net_admin)
L
Linus Torvalds 已提交
823
{
824 825
	if (!inet_diag_bc_sk(bc, sk))
		return 0;
L
Linus Torvalds 已提交
826

827
	return inet_csk_diag_fill(sk, skb, r,
828
				  sk_user_ns(NETLINK_CB(cb->skb).sk),
829
				  NETLINK_CB(cb->skb).portid,
830 831
				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh,
				  net_admin);
L
Linus Torvalds 已提交
832 833
}

834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
static void twsk_build_assert(void)
{
	BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
		     offsetof(struct sock, sk_family));

	BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) !=
		     offsetof(struct inet_sock, inet_num));

	BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) !=
		     offsetof(struct inet_sock, inet_dport));

	BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) !=
		     offsetof(struct inet_sock, inet_rcv_saddr));

	BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) !=
		     offsetof(struct inet_sock, inet_daddr));

#if IS_ENABLED(CONFIG_IPV6)
	BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) !=
		     offsetof(struct sock, sk_v6_rcv_saddr));

	BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) !=
		     offsetof(struct sock, sk_v6_daddr));
#endif
}

860
void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
E
Eric Dumazet 已提交
861
			 struct netlink_callback *cb,
862
			 const struct inet_diag_req_v2 *r, struct nlattr *bc)
L
Linus Torvalds 已提交
863
{
864
	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
A
Andrey Vagin 已提交
865
	struct net *net = sock_net(skb->sk);
866
	u32 idiag_states = r->idiag_states;
867 868
	int i, num, s_i, s_num;
	struct sock *sk;
869

870 871
	if (idiag_states & TCPF_SYN_RECV)
		idiag_states |= TCPF_NEW_SYN_RECV;
L
Linus Torvalds 已提交
872 873
	s_i = cb->args[1];
	s_num = num = cb->args[2];
874

L
Linus Torvalds 已提交
875
	if (cb->args[0] == 0) {
876
		if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
L
Linus Torvalds 已提交
877
			goto skip_listen_ht;
878

879
		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
880
			struct inet_listen_hashbucket *ilb;
L
Linus Torvalds 已提交
881 882

			num = 0;
883
			ilb = &hashinfo->listening_hash[i];
884
			spin_lock(&ilb->lock);
885
			sk_for_each(sk, &ilb->head) {
L
Linus Torvalds 已提交
886 887
				struct inet_sock *inet = inet_sk(sk);

A
Andrey Vagin 已提交
888 889 890
				if (!net_eq(sock_net(sk), net))
					continue;

L
Linus Torvalds 已提交
891 892 893 894 895
				if (num < s_num) {
					num++;
					continue;
				}

896
				if (r->sdiag_family != AF_UNSPEC &&
E
Eric Dumazet 已提交
897
				    sk->sk_family != r->sdiag_family)
898 899
					goto next_listen;

E
Eric Dumazet 已提交
900
				if (r->id.idiag_sport != inet->inet_sport &&
901
				    r->id.idiag_sport)
L
Linus Torvalds 已提交
902 903
					goto next_listen;

904 905
				if (inet_csk_diag_dump(sk, skb, cb, r,
						       bc, net_admin) < 0) {
906
					spin_unlock(&ilb->lock);
L
Linus Torvalds 已提交
907 908 909 910 911 912
					goto done;
				}

next_listen:
				++num;
			}
913
			spin_unlock(&ilb->lock);
L
Linus Torvalds 已提交
914 915 916 917 918 919 920 921

			s_num = 0;
		}
skip_listen_ht:
		cb->args[0] = 1;
		s_i = num = s_num = 0;
	}

922
	if (!(idiag_states & ~TCPF_LISTEN))
923
		goto out;
L
Linus Torvalds 已提交
924

925
#define SKARR_SZ 16
926
	for (i = s_i; i <= hashinfo->ehash_mask; i++) {
927
		struct inet_ehash_bucket *head = &hashinfo->ehash[i];
928
		spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
929
		struct hlist_nulls_node *node;
930 931 932
		struct sock *sk_arr[SKARR_SZ];
		int num_arr[SKARR_SZ];
		int idx, accum, res;
933

E
Eric Dumazet 已提交
934
		if (hlist_nulls_empty(&head->chain))
935 936
			continue;

L
Linus Torvalds 已提交
937 938 939
		if (i > s_i)
			s_num = 0;

940 941 942
next_chunk:
		num = 0;
		accum = 0;
943
		spin_lock_bh(lock);
944
		sk_nulls_for_each(sk, node, &head->chain) {
945
			int state;
L
Linus Torvalds 已提交
946

A
Andrey Vagin 已提交
947 948
			if (!net_eq(sock_net(sk), net))
				continue;
L
Linus Torvalds 已提交
949 950
			if (num < s_num)
				goto next_normal;
951 952
			state = (sk->sk_state == TCP_TIME_WAIT) ?
				inet_twsk(sk)->tw_substate : sk->sk_state;
953
			if (!(idiag_states & (1 << state)))
L
Linus Torvalds 已提交
954
				goto next_normal;
955
			if (r->sdiag_family != AF_UNSPEC &&
E
Eric Dumazet 已提交
956
			    sk->sk_family != r->sdiag_family)
957
				goto next_normal;
E
Eric Dumazet 已提交
958
			if (r->id.idiag_sport != htons(sk->sk_num) &&
959
			    r->id.idiag_sport)
L
Linus Torvalds 已提交
960
				goto next_normal;
E
Eric Dumazet 已提交
961
			if (r->id.idiag_dport != sk->sk_dport &&
962
			    r->id.idiag_dport)
L
Linus Torvalds 已提交
963
				goto next_normal;
964 965 966 967 968
			twsk_build_assert();

			if (!inet_diag_bc_sk(bc, sk))
				goto next_normal;

969 970 971 972 973 974 975 976 977 978 979 980 981
			sock_hold(sk);
			num_arr[accum] = num;
			sk_arr[accum] = sk;
			if (++accum == SKARR_SZ)
				break;
next_normal:
			++num;
		}
		spin_unlock_bh(lock);
		res = 0;
		for (idx = 0; idx < accum; idx++) {
			if (res >= 0) {
				res = sk_diag_fill(sk_arr[idx], skb, r,
982 983 984
					   sk_user_ns(NETLINK_CB(cb->skb).sk),
					   NETLINK_CB(cb->skb).portid,
					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
985
					   cb->nlh, net_admin);
986 987
				if (res < 0)
					num = num_arr[idx];
L
Linus Torvalds 已提交
988
			}
989
			sock_gen_put(sk_arr[idx]);
L
Linus Torvalds 已提交
990
		}
991 992
		if (res < 0)
			break;
993
		cond_resched();
994 995 996 997
		if (accum == SKARR_SZ) {
			s_num = num + 1;
			goto next_chunk;
		}
L
Linus Torvalds 已提交
998 999 1000 1001 1002
	}

done:
	cb->args[1] = i;
	cb->args[2] = num;
1003 1004 1005
out:
	;
}
1006
EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
1007 1008

static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
1009
			    const struct inet_diag_req_v2 *r,
E
Eric Dumazet 已提交
1010
			    struct nlattr *bc)
1011 1012
{
	const struct inet_diag_handler *handler;
1013
	int err = 0;
1014 1015 1016

	handler = inet_diag_lock_handler(r->sdiag_protocol);
	if (!IS_ERR(handler))
1017
		handler->dump(skb, cb, r, bc);
1018 1019
	else
		err = PTR_ERR(handler);
1020
	inet_diag_unlock_handler(handler);
1021

1022
	return err ? : skb->len;
L
Linus Torvalds 已提交
1023 1024
}

1025 1026
static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
1027
	int hdrlen = sizeof(struct inet_diag_req_v2);
E
Eric Dumazet 已提交
1028
	struct nlattr *bc = NULL;
1029 1030 1031 1032

	if (nlmsg_attrlen(cb->nlh, hdrlen))
		bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);

1033
	return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc);
1034 1035
}

E
Eric Dumazet 已提交
1036
static int inet_diag_type2proto(int type)
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
{
	switch (type) {
	case TCPDIAG_GETSOCK:
		return IPPROTO_TCP;
	case DCCPDIAG_GETSOCK:
		return IPPROTO_DCCP;
	default:
		return 0;
	}
}

E
Eric Dumazet 已提交
1048 1049
static int inet_diag_dump_compat(struct sk_buff *skb,
				 struct netlink_callback *cb)
1050
{
1051
	struct inet_diag_req *rc = nlmsg_data(cb->nlh);
E
Eric Dumazet 已提交
1052
	int hdrlen = sizeof(struct inet_diag_req);
1053
	struct inet_diag_req_v2 req;
1054 1055
	struct nlattr *bc = NULL;

1056
	req.sdiag_family = AF_UNSPEC; /* compatibility */
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067
	req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
	req.idiag_ext = rc->idiag_ext;
	req.idiag_states = rc->idiag_states;
	req.id = rc->id;

	if (nlmsg_attrlen(cb->nlh, hdrlen))
		bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);

	return __inet_diag_dump(skb, cb, &req, bc);
}

1068
static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
E
Eric Dumazet 已提交
1069
				      const struct nlmsghdr *nlh)
1070
{
1071
	struct inet_diag_req *rc = nlmsg_data(nlh);
1072
	struct inet_diag_req_v2 req;
1073 1074 1075 1076 1077 1078 1079

	req.sdiag_family = rc->idiag_family;
	req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
	req.idiag_ext = rc->idiag_ext;
	req.idiag_states = rc->idiag_states;
	req.id = rc->id;

1080
	return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
1081 1082
}

1083
static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
L
Linus Torvalds 已提交
1084
{
1085
	int hdrlen = sizeof(struct inet_diag_req);
A
Andrey Vagin 已提交
1086
	struct net *net = sock_net(skb->sk);
L
Linus Torvalds 已提交
1087

1088 1089 1090
	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
	    nlmsg_len(nlh) < hdrlen)
		return -EINVAL;
L
Linus Torvalds 已提交
1091

1092
	if (nlh->nlmsg_flags & NLM_F_DUMP) {
1093 1094
		if (nlmsg_attrlen(nlh, hdrlen)) {
			struct nlattr *attr;
1095
			int err;
L
Linus Torvalds 已提交
1096

1097 1098
			attr = nlmsg_find_attr(nlh, hdrlen,
					       INET_DIAG_REQ_BYTECODE);
1099
			err = inet_diag_bc_audit(attr, skb);
1100 1101
			if (err)
				return err;
1102
		}
1103 1104 1105 1106
		{
			struct netlink_dump_control c = {
				.dump = inet_diag_dump_compat,
			};
A
Andrey Vagin 已提交
1107
			return netlink_dump_start(net->diag_nlsk, skb, nlh, &c);
1108
		}
L
Linus Torvalds 已提交
1109
	}
1110

1111
	return inet_diag_get_exact_compat(skb, nlh);
L
Linus Torvalds 已提交
1112 1113
}

1114
static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
P
Pavel Emelyanov 已提交
1115
{
1116
	int hdrlen = sizeof(struct inet_diag_req_v2);
A
Andrey Vagin 已提交
1117
	struct net *net = sock_net(skb->sk);
P
Pavel Emelyanov 已提交
1118 1119 1120 1121

	if (nlmsg_len(h) < hdrlen)
		return -EINVAL;

1122 1123
	if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
	    h->nlmsg_flags & NLM_F_DUMP) {
1124 1125
		if (nlmsg_attrlen(h, hdrlen)) {
			struct nlattr *attr;
1126
			int err;
E
Eric Dumazet 已提交
1127

1128 1129
			attr = nlmsg_find_attr(h, hdrlen,
					       INET_DIAG_REQ_BYTECODE);
1130
			err = inet_diag_bc_audit(attr, skb);
1131 1132
			if (err)
				return err;
1133
		}
1134 1135 1136 1137
		{
			struct netlink_dump_control c = {
				.dump = inet_diag_dump,
			};
A
Andrey Vagin 已提交
1138
			return netlink_dump_start(net->diag_nlsk, skb, h, &c);
1139
		}
P
Pavel Emelyanov 已提交
1140 1141
	}

1142
	return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
P
Pavel Emelyanov 已提交
1143 1144
}

1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161
static
int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
{
	const struct inet_diag_handler *handler;
	struct nlmsghdr *nlh;
	struct nlattr *attr;
	struct inet_diag_msg *r;
	void *info = NULL;
	int err = 0;

	nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
	if (!nlh)
		return -ENOMEM;

	r = nlmsg_data(nlh);
	memset(r, 0, sizeof(*r));
	inet_diag_msg_common_fill(r, sk);
1162 1163
	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM)
		r->id.idiag_sport = inet_sk(sk)->inet_sport;
1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
	r->idiag_state = sk->sk_state;

	if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
		nlmsg_cancel(skb, nlh);
		return err;
	}

	handler = inet_diag_lock_handler(sk->sk_protocol);
	if (IS_ERR(handler)) {
		inet_diag_unlock_handler(handler);
		nlmsg_cancel(skb, nlh);
		return PTR_ERR(handler);
	}

	attr = handler->idiag_info_size
1179 1180 1181
		? nla_reserve_64bit(skb, INET_DIAG_INFO,
				    handler->idiag_info_size,
				    INET_DIAG_PAD)
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192
		: NULL;
	if (attr)
		info = nla_data(attr);

	handler->idiag_get_info(sk, r, info);
	inet_diag_unlock_handler(handler);

	nlmsg_end(skb, nlh);
	return 0;
}

1193
static const struct sock_diag_handler inet_diag_handler = {
P
Pavel Emelyanov 已提交
1194
	.family = AF_INET,
1195
	.dump = inet_diag_handler_cmd,
1196
	.get_info = inet_diag_handler_get_info,
1197
	.destroy = inet_diag_handler_cmd,
P
Pavel Emelyanov 已提交
1198 1199
};

1200
static const struct sock_diag_handler inet6_diag_handler = {
P
Pavel Emelyanov 已提交
1201
	.family = AF_INET6,
1202
	.dump = inet_diag_handler_cmd,
1203
	.get_info = inet_diag_handler_get_info,
1204
	.destroy = inet_diag_handler_cmd,
P
Pavel Emelyanov 已提交
1205 1206
};

1207 1208 1209 1210 1211
int inet_diag_register(const struct inet_diag_handler *h)
{
	const __u16 type = h->idiag_type;
	int err = -EINVAL;

1212
	if (type >= IPPROTO_MAX)
1213 1214
		goto out;

1215
	mutex_lock(&inet_diag_table_mutex);
1216
	err = -EEXIST;
E
Eric Dumazet 已提交
1217
	if (!inet_diag_table[type]) {
1218 1219 1220
		inet_diag_table[type] = h;
		err = 0;
	}
1221
	mutex_unlock(&inet_diag_table_mutex);
1222 1223 1224 1225 1226 1227 1228 1229 1230
out:
	return err;
}
EXPORT_SYMBOL_GPL(inet_diag_register);

void inet_diag_unregister(const struct inet_diag_handler *h)
{
	const __u16 type = h->idiag_type;

1231
	if (type >= IPPROTO_MAX)
1232 1233
		return;

1234
	mutex_lock(&inet_diag_table_mutex);
1235
	inet_diag_table[type] = NULL;
1236
	mutex_unlock(&inet_diag_table_mutex);
1237 1238 1239
}
EXPORT_SYMBOL_GPL(inet_diag_unregister);

1240
static int __init inet_diag_init(void)
L
Linus Torvalds 已提交
1241
{
1242
	const int inet_diag_table_size = (IPPROTO_MAX *
1243 1244 1245
					  sizeof(struct inet_diag_handler *));
	int err = -ENOMEM;

1246
	inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL);
1247 1248 1249
	if (!inet_diag_table)
		goto out;

P
Pavel Emelyanov 已提交
1250 1251 1252 1253 1254 1255 1256 1257
	err = sock_diag_register(&inet_diag_handler);
	if (err)
		goto out_free_nl;

	err = sock_diag_register(&inet6_diag_handler);
	if (err)
		goto out_free_inet;

1258
	sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
1259 1260
out:
	return err;
P
Pavel Emelyanov 已提交
1261 1262 1263 1264

out_free_inet:
	sock_diag_unregister(&inet_diag_handler);
out_free_nl:
1265 1266
	kfree(inet_diag_table);
	goto out;
L
Linus Torvalds 已提交
1267 1268
}

1269
static void __exit inet_diag_exit(void)
L
Linus Torvalds 已提交
1270
{
P
Pavel Emelyanov 已提交
1271 1272
	sock_diag_unregister(&inet6_diag_handler);
	sock_diag_unregister(&inet_diag_handler);
1273
	sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
1274
	kfree(inet_diag_table);
L
Linus Torvalds 已提交
1275 1276
}

1277 1278
module_init(inet_diag_init);
module_exit(inet_diag_exit);
L
Linus Torvalds 已提交
1279
MODULE_LICENSE("GPL");
1280 1281
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);