tcp_ipv6.c 58.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3
/*
 *	TCP over IPv6
4
 *	Linux INET6 implementation
L
Linus Torvalds 已提交
5 6
 *
 *	Authors:
7
 *	Pedro Roque		<roque@di.fc.ul.pt>
L
Linus Torvalds 已提交
8
 *
9
 *	Based on:
L
Linus Torvalds 已提交
10 11 12 13 14 15 16 17 18 19 20 21
 *	linux/net/ipv4/tcp.c
 *	linux/net/ipv4/tcp_input.c
 *	linux/net/ipv4/tcp_output.c
 *
 *	Fixes:
 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
 */

H
Herbert Xu 已提交
22
#include <linux/bottom_half.h>
L
Linus Torvalds 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/jiffies.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/ipsec.h>
#include <linux/times.h>
37
#include <linux/slab.h>
W
Wang Yufen 已提交
38
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
39 40 41
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
42
#include <linux/indirect_call_wrapper.h>
L
Linus Torvalds 已提交
43 44 45

#include <net/tcp.h>
#include <net/ndisc.h>
46
#include <net/inet6_hashtables.h>
47
#include <net/inet6_connection_sock.h>
L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57
#include <net/ipv6.h>
#include <net/transp_v6.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/ip6_checksum.h>
#include <net/inet_ecn.h>
#include <net/protocol.h>
#include <net/xfrm.h>
#include <net/snmp.h>
#include <net/dsfield.h>
58
#include <net/timewait_sock.h>
59
#include <net/inet_common.h>
60
#include <net/secure_seq.h>
61
#include <net/busy_poll.h>
L
Linus Torvalds 已提交
62 63 64 65

#include <linux/proc_fs.h>
#include <linux/seq_file.h>

H
Herbert Xu 已提交
66
#include <crypto/hash.h>
67 68
#include <linux/scatterlist.h>

69 70
#include <trace/events/tcp.h>

71 72
static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73
				      struct request_sock *req);
L
Linus Torvalds 已提交
74 75 76

static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);

77
static const struct inet_connection_sock_af_ops ipv6_mapped;
78
const struct inet_connection_sock_af_ops ipv6_specific;
79
#ifdef CONFIG_TCP_MD5SIG
80 81
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82
#else
83
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 85
						   const struct in6_addr *addr,
						   int l3index)
86 87 88
{
	return NULL;
}
89
#endif
L
Linus Torvalds 已提交
90

E
Eric Dumazet 已提交
91 92 93
/* Helper returning the inet6 address from a given tcp socket.
 * It can be used in TCP stack instead of inet6_sk(sk).
 * This avoids a dereference and allow compiler optimizations.
94
 * It is a specialized version of inet6_sk_generic().
E
Eric Dumazet 已提交
95 96 97
 */
static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
{
98
	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
E
Eric Dumazet 已提交
99

100
	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
E
Eric Dumazet 已提交
101 102
}

103 104 105 106
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);

E
Eric Dumazet 已提交
107
	if (dst && dst_hold_safe(dst)) {
108 109 110 111
		const struct rt6_info *rt = (const struct rt6_info *)dst;

		sk->sk_rx_dst = dst;
		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
E
Eric Dumazet 已提交
112
		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113
	}
114 115
}

116
static u32 tcp_v6_init_seq(const struct sk_buff *skb)
L
Linus Torvalds 已提交
117
{
118 119 120 121 122 123
	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
				ipv6_hdr(skb)->saddr.s6_addr32,
				tcp_hdr(skb)->dest,
				tcp_hdr(skb)->source);
}

124
static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125
{
126
	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127
				   ipv6_hdr(skb)->saddr.s6_addr32);
L
Linus Torvalds 已提交
128 129
}

A
Andrey Ignatov 已提交
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
			      int addr_len)
{
	/* This check is replicated from tcp_v6_connect() and intended to
	 * prevent BPF program called below from accessing bytes that are out
	 * of the bound specified by user in addr_len.
	 */
	if (addr_len < SIN6_LEN_RFC2133)
		return -EINVAL;

	sock_owned_by_me(sk);

	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
}

145
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
L
Linus Torvalds 已提交
146 147 148
			  int addr_len)
{
	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149
	struct inet_sock *inet = inet_sk(sk);
150
	struct inet_connection_sock *icsk = inet_csk(sk);
E
Eric Dumazet 已提交
151
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
L
Linus Torvalds 已提交
152
	struct tcp_sock *tp = tcp_sk(sk);
153
	struct in6_addr *saddr = NULL, *final_p, final;
154
	struct ipv6_txoptions *opt;
155
	struct flowi6 fl6;
L
Linus Torvalds 已提交
156 157 158
	struct dst_entry *dst;
	int addr_type;
	int err;
159
	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
L
Linus Torvalds 已提交
160

161
	if (addr_len < SIN6_LEN_RFC2133)
L
Linus Torvalds 已提交
162 163
		return -EINVAL;

164
	if (usin->sin6_family != AF_INET6)
E
Eric Dumazet 已提交
165
		return -EAFNOSUPPORT;
L
Linus Torvalds 已提交
166

167
	memset(&fl6, 0, sizeof(fl6));
L
Linus Torvalds 已提交
168 169

	if (np->sndflow) {
170 171 172
		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
		IP6_ECN_flow_init(fl6.flowlabel);
		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
L
Linus Torvalds 已提交
173
			struct ip6_flowlabel *flowlabel;
174
			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175
			if (IS_ERR(flowlabel))
L
Linus Torvalds 已提交
176 177 178 179 180 181
				return -EINVAL;
			fl6_sock_release(flowlabel);
		}
	}

	/*
182 183 184
	 *	connect() to INADDR_ANY means loopback (BSD'ism).
	 */

185 186 187 188 189 190 191
	if (ipv6_addr_any(&usin->sin6_addr)) {
		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
					       &usin->sin6_addr);
		else
			usin->sin6_addr = in6addr_loopback;
	}
L
Linus Torvalds 已提交
192 193 194

	addr_type = ipv6_addr_type(&usin->sin6_addr);

W
Weilong Chen 已提交
195
	if (addr_type & IPV6_ADDR_MULTICAST)
L
Linus Torvalds 已提交
196 197 198 199 200 201 202 203
		return -ENETUNREACH;

	if (addr_type&IPV6_ADDR_LINKLOCAL) {
		if (addr_len >= sizeof(struct sockaddr_in6) &&
		    usin->sin6_scope_id) {
			/* If interface is set while binding, indices
			 * must coincide.
			 */
204
			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
L
Linus Torvalds 已提交
205 206 207 208 209 210 211 212 213 214 215
				return -EINVAL;

			sk->sk_bound_dev_if = usin->sin6_scope_id;
		}

		/* Connect to link-local address requires an interface */
		if (!sk->sk_bound_dev_if)
			return -EINVAL;
	}

	if (tp->rx_opt.ts_recent_stamp &&
216
	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
L
Linus Torvalds 已提交
217 218
		tp->rx_opt.ts_recent = 0;
		tp->rx_opt.ts_recent_stamp = 0;
219
		WRITE_ONCE(tp->write_seq, 0);
L
Linus Torvalds 已提交
220 221
	}

222
	sk->sk_v6_daddr = usin->sin6_addr;
223
	np->flow_label = fl6.flowlabel;
L
Linus Torvalds 已提交
224 225 226 227 228

	/*
	 *	TCP over IPv4
	 */

229
	if (addr_type & IPV6_ADDR_MAPPED) {
230
		u32 exthdrlen = icsk->icsk_ext_hdr_len;
L
Linus Torvalds 已提交
231 232 233 234 235 236 237 238 239
		struct sockaddr_in sin;

		if (__ipv6_only_sock(sk))
			return -ENETUNREACH;

		sin.sin_family = AF_INET;
		sin.sin_port = usin->sin6_port;
		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];

240
		icsk->icsk_af_ops = &ipv6_mapped;
241
		if (sk_is_mptcp(sk))
242
			mptcpv6_handle_mapped(sk, true);
L
Linus Torvalds 已提交
243
		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 245 246
#ifdef CONFIG_TCP_MD5SIG
		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
L
Linus Torvalds 已提交
247 248 249 250

		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));

		if (err) {
251 252
			icsk->icsk_ext_hdr_len = exthdrlen;
			icsk->icsk_af_ops = &ipv6_specific;
253
			if (sk_is_mptcp(sk))
254
				mptcpv6_handle_mapped(sk, false);
L
Linus Torvalds 已提交
255
			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 257 258
#ifdef CONFIG_TCP_MD5SIG
			tp->af_specific = &tcp_sock_ipv6_specific;
#endif
L
Linus Torvalds 已提交
259 260
			goto failure;
		}
261
		np->saddr = sk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
262 263 264 265

		return err;
	}

266 267
	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
		saddr = &sk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
268

269
	fl6.flowi6_proto = IPPROTO_TCP;
270
	fl6.daddr = sk->sk_v6_daddr;
A
Alexey Dobriyan 已提交
271
	fl6.saddr = saddr ? *saddr : np->saddr;
272 273
	fl6.flowi6_oif = sk->sk_bound_dev_if;
	fl6.flowi6_mark = sk->sk_mark;
274 275
	fl6.fl6_dport = usin->sin6_port;
	fl6.fl6_sport = inet->inet_sport;
276
	fl6.flowi6_uid = sk->sk_uid;
L
Linus Torvalds 已提交
277

278
	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279
	final_p = fl6_update_dst(&fl6, opt, &final);
L
Linus Torvalds 已提交
280

281
	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
V
Venkat Yekkirala 已提交
282

283
	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 285
	if (IS_ERR(dst)) {
		err = PTR_ERR(dst);
L
Linus Torvalds 已提交
286
		goto failure;
287
	}
L
Linus Torvalds 已提交
288

289
	if (!saddr) {
290
		saddr = &fl6.saddr;
291
		sk->sk_v6_rcv_saddr = *saddr;
L
Linus Torvalds 已提交
292 293 294
	}

	/* set the source address */
A
Alexey Dobriyan 已提交
295
	np->saddr = *saddr;
E
Eric Dumazet 已提交
296
	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
L
Linus Torvalds 已提交
297

H
Herbert Xu 已提交
298
	sk->sk_gso_type = SKB_GSO_TCPV6;
E
Eric Dumazet 已提交
299
	ip6_dst_store(sk, dst, NULL, NULL);
L
Linus Torvalds 已提交
300

301
	icsk->icsk_ext_hdr_len = 0;
302 303 304
	if (opt)
		icsk->icsk_ext_hdr_len = opt->opt_flen +
					 opt->opt_nflen;
L
Linus Torvalds 已提交
305 306 307

	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);

E
Eric Dumazet 已提交
308
	inet->inet_dport = usin->sin6_port;
L
Linus Torvalds 已提交
309 310

	tcp_set_state(sk, TCP_SYN_SENT);
311
	err = inet6_hash_connect(tcp_death_row, sk);
L
Linus Torvalds 已提交
312 313 314
	if (err)
		goto late_failure;

315
	sk_set_txhash(sk);
316

317 318
	if (likely(!tp->repair)) {
		if (!tp->write_seq)
319 320 321 322 323
			WRITE_ONCE(tp->write_seq,
				   secure_tcpv6_seq(np->saddr.s6_addr32,
						    sk->sk_v6_daddr.s6_addr32,
						    inet->inet_sport,
						    inet->inet_dport));
324 325
		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
						   np->saddr.s6_addr32,
326
						   sk->sk_v6_daddr.s6_addr32);
327
	}
L
Linus Torvalds 已提交
328

W
Wei Wang 已提交
329 330 331 332 333
	if (tcp_fastopen_defer_connect(sk, &err))
		return err;
	if (err)
		goto late_failure;

L
Linus Torvalds 已提交
334 335 336 337 338 339 340 341 342
	err = tcp_connect(sk);
	if (err)
		goto late_failure;

	return 0;

late_failure:
	tcp_set_state(sk, TCP_CLOSE);
failure:
E
Eric Dumazet 已提交
343
	inet->inet_dport = 0;
L
Linus Torvalds 已提交
344 345 346 347
	sk->sk_route_caps = 0;
	return err;
}

348 349 350
static void tcp_v6_mtu_reduced(struct sock *sk)
{
	struct dst_entry *dst;
351
	u32 mtu;
352 353 354 355

	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
		return;

356 357 358 359 360 361 362 363 364
	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);

	/* Drop requests trying to increase our current mss.
	 * Check done in __ip6_rt_update_pmtu() is too late.
	 */
	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
		return;

	dst = inet6_csk_update_pmtu(sk, mtu);
365 366 367 368 369 370 371 372 373
	if (!dst)
		return;

	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
		tcp_sync_mss(sk, dst_mtu(dst));
		tcp_simple_retransmit(sk);
	}
}

374
static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375
		u8 type, u8 code, int offset, __be32 info)
L
Linus Torvalds 已提交
376
{
W
Weilong Chen 已提交
377
	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378
	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379 380
	struct net *net = dev_net(skb->dev);
	struct request_sock *fastopen;
L
Linus Torvalds 已提交
381
	struct ipv6_pinfo *np;
382
	struct tcp_sock *tp;
383
	__u32 seq, snd_una;
384
	struct sock *sk;
385
	bool fatal;
386
	int err;
L
Linus Torvalds 已提交
387

388 389 390
	sk = __inet6_lookup_established(net, &tcp_hashinfo,
					&hdr->daddr, th->dest,
					&hdr->saddr, ntohs(th->source),
391
					skb->dev->ifindex, inet6_sdif(skb));
L
Linus Torvalds 已提交
392

393
	if (!sk) {
E
Eric Dumazet 已提交
394 395
		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
				  ICMP6_MIB_INERRORS);
396
		return -ENOENT;
L
Linus Torvalds 已提交
397 398 399
	}

	if (sk->sk_state == TCP_TIME_WAIT) {
400
		inet_twsk_put(inet_twsk(sk));
401
		return 0;
L
Linus Torvalds 已提交
402
	}
403
	seq = ntohl(th->seq);
404
	fatal = icmpv6_err_convert(type, code, &err);
405 406 407 408
	if (sk->sk_state == TCP_NEW_SYN_RECV) {
		tcp_req_err(sk, seq, fatal);
		return 0;
	}
L
Linus Torvalds 已提交
409 410

	bh_lock_sock(sk);
411
	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
412
		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
L
Linus Torvalds 已提交
413 414 415 416

	if (sk->sk_state == TCP_CLOSE)
		goto out;

E
Eric Dumazet 已提交
417
	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
418
		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
419 420 421
		goto out;
	}

L
Linus Torvalds 已提交
422
	tp = tcp_sk(sk);
423
	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
424
	fastopen = rcu_dereference(tp->fastopen_rsk);
425
	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
L
Linus Torvalds 已提交
426
	if (sk->sk_state != TCP_LISTEN &&
427
	    !between(seq, snd_una, tp->snd_nxt)) {
428
		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
L
Linus Torvalds 已提交
429 430 431
		goto out;
	}

E
Eric Dumazet 已提交
432
	np = tcp_inet6_sk(sk);
L
Linus Torvalds 已提交
433

434
	if (type == NDISC_REDIRECT) {
435 436
		if (!sock_owned_by_user(sk)) {
			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
437

438 439 440
			if (dst)
				dst->ops->redirect(dst, sk, skb);
		}
441
		goto out;
442 443
	}

L
Linus Torvalds 已提交
444
	if (type == ICMPV6_PKT_TOOBIG) {
445 446
		u32 mtu = ntohl(info);

447 448 449 450 451 452 453
		/* We are not interested in TCP_LISTEN and open_requests
		 * (SYN-ACKs send out by Linux are always <576bytes so
		 * they should go through unfragmented).
		 */
		if (sk->sk_state == TCP_LISTEN)
			goto out;

454 455 456
		if (!ip6_sk_accept_pmtu(sk))
			goto out;

457 458 459 460 461
		if (mtu < IPV6_MIN_MTU)
			goto out;

		WRITE_ONCE(tp->mtu_info, mtu);

462 463
		if (!sock_owned_by_user(sk))
			tcp_v6_mtu_reduced(sk);
464
		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
465
					   &sk->sk_tsq_flags))
466
			sock_hold(sk);
L
Linus Torvalds 已提交
467 468 469 470
		goto out;
	}


471
	/* Might be for an request_sock */
L
Linus Torvalds 已提交
472 473
	switch (sk->sk_state) {
	case TCP_SYN_SENT:
474 475
	case TCP_SYN_RECV:
		/* Only in fast or simultaneous open. If a fast open socket is
476
		 * already accepted it is treated as a connected one below.
477
		 */
478
		if (fastopen && !fastopen->sk)
479 480
			break;

481 482
		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);

L
Linus Torvalds 已提交
483 484
		if (!sock_owned_by_user(sk)) {
			sk->sk_err = err;
485
			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
L
Linus Torvalds 已提交
486 487 488 489 490

			tcp_done(sk);
		} else
			sk->sk_err_soft = err;
		goto out;
491 492 493 494 495 496 497 498 499
	case TCP_LISTEN:
		break;
	default:
		/* check if this ICMP message allows revert of backoff.
		 * (see RFC 6069)
		 */
		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
		    code == ICMPV6_NOROUTE)
			tcp_ld_RTO_revert(sk, seq);
L
Linus Torvalds 已提交
500 501 502 503
	}

	if (!sock_owned_by_user(sk) && np->recverr) {
		sk->sk_err = err;
504
		sk_error_report(sk);
L
Linus Torvalds 已提交
505 506 507 508 509 510
	} else
		sk->sk_err_soft = err;

out:
	bh_unlock_sock(sk);
	sock_put(sk);
511
	return 0;
L
Linus Torvalds 已提交
512 513 514
}


515
static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
516
			      struct flowi *fl,
517
			      struct request_sock *req,
518
			      struct tcp_fastopen_cookie *foc,
519 520
			      enum tcp_synack_type synack_type,
			      struct sk_buff *syn_skb)
L
Linus Torvalds 已提交
521
{
522
	struct inet_request_sock *ireq = inet_rsk(req);
E
Eric Dumazet 已提交
523
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
524
	struct ipv6_txoptions *opt;
525
	struct flowi6 *fl6 = &fl->u.ip6;
W
Weilong Chen 已提交
526
	struct sk_buff *skb;
527
	int err = -ENOMEM;
528
	u8 tclass;
L
Linus Torvalds 已提交
529

530
	/* First, grab a route. */
531 532
	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
					       IPPROTO_TCP)) == NULL)
533
		goto done;
534

535
	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
536

L
Linus Torvalds 已提交
537
	if (skb) {
538 539
		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
				    &ireq->ir_v6_rmt_addr);
L
Linus Torvalds 已提交
540

541
		fl6->daddr = ireq->ir_v6_rmt_addr;
542
		if (np->repflow && ireq->pktopts)
543 544
			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));

545
		tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
W
Wei Wang 已提交
546 547
				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
				(np->tclass & INET_ECN_MASK) :
548
				np->tclass;
549 550 551 552 553 554 555

		if (!INET_ECN_is_capable(tclass) &&
		    tcp_bpf_ca_needs_ecn((struct sock *)req))
			tclass |= INET_ECN_ECT_0;

		rcu_read_lock();
		opt = ireq->ipv6_opt;
556 557
		if (!opt)
			opt = rcu_dereference(np->opt);
558
		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
559
			       tclass, sk->sk_priority);
560
		rcu_read_unlock();
561
		err = net_xmit_eval(err);
L
Linus Torvalds 已提交
562 563 564 565 566 567
	}

done:
	return err;
}

568

569
static void tcp_v6_reqsk_destructor(struct request_sock *req)
L
Linus Torvalds 已提交
570
{
571
	kfree(inet_rsk(req)->ipv6_opt);
572
	kfree_skb(inet_rsk(req)->pktopts);
L
Linus Torvalds 已提交
573 574
}

575
#ifdef CONFIG_TCP_MD5SIG
576
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
577 578
						   const struct in6_addr *addr,
						   int l3index)
579
{
580 581
	return tcp_md5_do_lookup(sk, l3index,
				 (union tcp_md5_addr *)addr, AF_INET6);
582 583
}

584
static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
585
						const struct sock *addr_sk)
586
{
587 588 589 590 591 592
	int l3index;

	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
						 addr_sk->sk_bound_dev_if);
	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
				    l3index);
593 594
}

595
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
596
				 sockptr_t optval, int optlen)
597 598 599
{
	struct tcp_md5sig cmd;
	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
600
	int l3index = 0;
601
	u8 prefixlen;
602 603 604 605

	if (optlen < sizeof(cmd))
		return -EINVAL;

606
	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
607 608 609 610 611
		return -EFAULT;

	if (sin6->sin6_family != AF_INET6)
		return -EINVAL;

612 613 614 615 616 617 618 619 620 621
	if (optname == TCP_MD5SIG_EXT &&
	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
		prefixlen = cmd.tcpm_prefixlen;
		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
					prefixlen > 32))
			return -EINVAL;
	} else {
		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
	}

622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
	if (optname == TCP_MD5SIG_EXT &&
	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
		struct net_device *dev;

		rcu_read_lock();
		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
		if (dev && netif_is_l3_master(dev))
			l3index = dev->ifindex;
		rcu_read_unlock();

		/* ok to reference set/not set outside of rcu;
		 * right now device MUST be an L3 master
		 */
		if (!dev || !l3index)
			return -EINVAL;
	}

639
	if (!cmd.tcpm_keylen) {
B
Brian Haley 已提交
640
		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
E
Eric Dumazet 已提交
641
			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
642 643
					      AF_INET, prefixlen,
					      l3index);
E
Eric Dumazet 已提交
644
		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
645
				      AF_INET6, prefixlen, l3index);
646 647 648 649 650
	}

	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
		return -EINVAL;

E
Eric Dumazet 已提交
651 652
	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
653 654 655
				      AF_INET, prefixlen, l3index,
				      cmd.tcpm_key, cmd.tcpm_keylen,
				      GFP_KERNEL);
656

E
Eric Dumazet 已提交
657
	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658 659
			      AF_INET6, prefixlen, l3index,
			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
660 661
}

662 663 664 665
static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
				   const struct in6_addr *daddr,
				   const struct in6_addr *saddr,
				   const struct tcphdr *th, int nbytes)
666 667
{
	struct tcp6_pseudohdr *bp;
668
	struct scatterlist sg;
669
	struct tcphdr *_th;
670

671
	bp = hp->scratch;
672
	/* 1. TCP pseudo-header (RFC2460) */
A
Alexey Dobriyan 已提交
673 674
	bp->saddr = *saddr;
	bp->daddr = *daddr;
675
	bp->protocol = cpu_to_be32(IPPROTO_TCP);
A
Adam Langley 已提交
676
	bp->len = cpu_to_be32(nbytes);
677

678 679 680 681 682 683 684
	_th = (struct tcphdr *)(bp + 1);
	memcpy(_th, th, sizeof(*th));
	_th->check = 0;

	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
				sizeof(*bp) + sizeof(*th));
H
Herbert Xu 已提交
685
	return crypto_ahash_update(hp->md5_req);
686
}
687

688
static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
689
			       const struct in6_addr *daddr, struct in6_addr *saddr,
E
Eric Dumazet 已提交
690
			       const struct tcphdr *th)
691 692
{
	struct tcp_md5sig_pool *hp;
H
Herbert Xu 已提交
693
	struct ahash_request *req;
694 695 696 697

	hp = tcp_get_md5sig_pool();
	if (!hp)
		goto clear_hash_noput;
H
Herbert Xu 已提交
698
	req = hp->md5_req;
699

H
Herbert Xu 已提交
700
	if (crypto_ahash_init(req))
701
		goto clear_hash;
702
	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
703 704 705
		goto clear_hash;
	if (tcp_md5_hash_key(hp, key))
		goto clear_hash;
H
Herbert Xu 已提交
706 707
	ahash_request_set_crypt(req, NULL, md5_hash, 0);
	if (crypto_ahash_final(req))
708 709 710 711
		goto clear_hash;

	tcp_put_md5sig_pool();
	return 0;
712

713 714 715 716
clear_hash:
	tcp_put_md5sig_pool();
clear_hash_noput:
	memset(md5_hash, 0, 16);
717
	return 1;
718 719
}

720 721
static int tcp_v6_md5_hash_skb(char *md5_hash,
			       const struct tcp_md5sig_key *key,
E
Eric Dumazet 已提交
722 723
			       const struct sock *sk,
			       const struct sk_buff *skb)
724
{
725
	const struct in6_addr *saddr, *daddr;
726
	struct tcp_md5sig_pool *hp;
H
Herbert Xu 已提交
727
	struct ahash_request *req;
E
Eric Dumazet 已提交
728
	const struct tcphdr *th = tcp_hdr(skb);
729

730 731
	if (sk) { /* valid for establish/request sockets */
		saddr = &sk->sk_v6_rcv_saddr;
732
		daddr = &sk->sk_v6_daddr;
733
	} else {
734
		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
735 736
		saddr = &ip6h->saddr;
		daddr = &ip6h->daddr;
737
	}
738 739 740 741

	hp = tcp_get_md5sig_pool();
	if (!hp)
		goto clear_hash_noput;
H
Herbert Xu 已提交
742
	req = hp->md5_req;
743

H
Herbert Xu 已提交
744
	if (crypto_ahash_init(req))
745 746
		goto clear_hash;

747
	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
748 749 750 751 752
		goto clear_hash;
	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
		goto clear_hash;
	if (tcp_md5_hash_key(hp, key))
		goto clear_hash;
H
Herbert Xu 已提交
753 754
	ahash_request_set_crypt(req, NULL, md5_hash, 0);
	if (crypto_ahash_final(req))
755 756 757 758 759 760 761 762 763 764
		goto clear_hash;

	tcp_put_md5sig_pool();
	return 0;

clear_hash:
	tcp_put_md5sig_pool();
clear_hash_noput:
	memset(md5_hash, 0, 16);
	return 1;
765 766
}

767 768 769
#endif

static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
770 771
				    const struct sk_buff *skb,
				    int dif, int sdif)
772
{
773
#ifdef CONFIG_TCP_MD5SIG
774
	const __u8 *hash_location = NULL;
775
	struct tcp_md5sig_key *hash_expected;
776
	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
E
Eric Dumazet 已提交
777
	const struct tcphdr *th = tcp_hdr(skb);
778
	int genhash, l3index;
779 780
	u8 newhash[16];

781 782 783 784 785 786
	/* sdif set, means packet ingressed via a device
	 * in an L3 domain and dif is set to the l3mdev
	 */
	l3index = sdif ? dif : 0;

	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
787
	hash_location = tcp_parse_md5sig_option(th);
788

789 790
	/* We've parsed the options - do we have a hash? */
	if (!hash_expected && !hash_location)
791
		return false;
792 793

	if (hash_expected && !hash_location) {
794
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
795
		return true;
796 797
	}

798
	if (!hash_expected && hash_location) {
799
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
800
		return true;
801 802 803
	}

	/* check the signature */
804 805
	genhash = tcp_v6_md5_hash_skb(newhash,
				      hash_expected,
806
				      NULL, skb);
807

808
	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
809
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
810
		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
811 812
				     genhash ? "failed" : "mismatch",
				     &ip6h->saddr, ntohs(th->source),
813
				     &ip6h->daddr, ntohs(th->dest), l3index);
814
		return true;
815
	}
816
#endif
817
	return false;
818 819
}

820 821
static void tcp_v6_init_req(struct request_sock *req,
			    const struct sock *sk_listener,
822 823
			    struct sk_buff *skb)
{
824
	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
825
	struct inet_request_sock *ireq = inet_rsk(req);
E
Eric Dumazet 已提交
826
	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
827 828 829 830 831

	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;

	/* So that link locals have meaning */
832
	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
833
	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
E
Eric Dumazet 已提交
834
		ireq->ir_iif = tcp_v6_iif(skb);
835

836
	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
837
	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
838
	     np->rxopt.bits.rxinfo ||
839 840
	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
	     np->rxopt.bits.rxohlim || np->repflow)) {
841
		refcount_inc(&skb->users);
842 843 844 845
		ireq->pktopts = skb;
	}
}

846
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
847
					  struct sk_buff *skb,
848
					  struct flowi *fl,
849
					  struct request_sock *req)
850
{
851 852 853 854 855
	tcp_v6_init_req(req, sk, skb);

	if (security_inet_conn_request(sk, skb, req))
		return NULL;

856
	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
857 858
}

859
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
L
Linus Torvalds 已提交
860
	.family		=	AF_INET6,
861
	.obj_size	=	sizeof(struct tcp6_request_sock),
862
	.rtx_syn_ack	=	tcp_rtx_synack,
863 864
	.send_ack	=	tcp_v6_reqsk_send_ack,
	.destructor	=	tcp_v6_reqsk_destructor,
865
	.send_reset	=	tcp_v6_send_reset,
W
Wang Yufen 已提交
866
	.syn_ack_timeout =	tcp_syn_ack_timeout,
L
Linus Torvalds 已提交
867 868
};

869
const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
870 871
	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
				sizeof(struct ipv6hdr),
872
#ifdef CONFIG_TCP_MD5SIG
873
	.req_md5_lookup	=	tcp_v6_md5_lookup,
874
	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
875
#endif
876 877 878
#ifdef CONFIG_SYN_COOKIES
	.cookie_init_seq =	cookie_v6_init_sequence,
#endif
879
	.route_req	=	tcp_v6_route_req,
880 881
	.init_seq	=	tcp_v6_init_seq,
	.init_ts_off	=	tcp_v6_init_ts_off,
882
	.send_synack	=	tcp_v6_send_synack,
883
};
884

885
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
886 887
				 u32 ack, u32 win, u32 tsval, u32 tsecr,
				 int oif, struct tcp_md5sig_key *key, int rst,
888
				 u8 tclass, __be32 label, u32 priority)
L
Linus Torvalds 已提交
889
{
890 891
	const struct tcphdr *th = tcp_hdr(skb);
	struct tcphdr *t1;
L
Linus Torvalds 已提交
892
	struct sk_buff *buff;
893
	struct flowi6 fl6;
894
	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
895
	struct sock *ctl_sk = net->ipv6.tcp_sk;
896
	unsigned int tot_len = sizeof(struct tcphdr);
897
	__be32 mrst = 0, *topt;
E
Eric Dumazet 已提交
898
	struct dst_entry *dst;
J
Jon Maxwell 已提交
899
	__u32 mark = 0;
L
Linus Torvalds 已提交
900

901
	if (tsecr)
902
		tot_len += TCPOLEN_TSTAMP_ALIGNED;
903 904 905 906 907
#ifdef CONFIG_TCP_MD5SIG
	if (key)
		tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif

908 909 910 911 912 913 914 915 916
#ifdef CONFIG_MPTCP
	if (rst && !key) {
		mrst = mptcp_reset_option(skb);

		if (mrst)
			tot_len += sizeof(__be32);
	}
#endif

917
	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
L
Linus Torvalds 已提交
918
			 GFP_ATOMIC);
919
	if (!buff)
920
		return;
L
Linus Torvalds 已提交
921

922
	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
L
Linus Torvalds 已提交
923

924
	t1 = skb_push(buff, tot_len);
925
	skb_reset_transport_header(buff);
L
Linus Torvalds 已提交
926 927 928 929 930

	/* Swap the send and the receive. */
	memset(t1, 0, sizeof(*t1));
	t1->dest = th->source;
	t1->source = th->dest;
931
	t1->doff = tot_len / 4;
932 933 934 935 936
	t1->seq = htonl(seq);
	t1->ack_seq = htonl(ack);
	t1->ack = !rst || !th->ack;
	t1->rst = rst;
	t1->window = htons(win);
L
Linus Torvalds 已提交
937

938 939
	topt = (__be32 *)(t1 + 1);

940
	if (tsecr) {
941 942
		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
943 944
		*topt++ = htonl(tsval);
		*topt++ = htonl(tsecr);
945 946
	}

947 948 949
	if (mrst)
		*topt++ = mrst;

950 951
#ifdef CONFIG_TCP_MD5SIG
	if (key) {
952 953 954
		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
955 956
				    &ipv6_hdr(skb)->saddr,
				    &ipv6_hdr(skb)->daddr, t1);
957 958 959
	}
#endif

960
	memset(&fl6, 0, sizeof(fl6));
A
Alexey Dobriyan 已提交
961 962
	fl6.daddr = ipv6_hdr(skb)->saddr;
	fl6.saddr = ipv6_hdr(skb)->daddr;
963
	fl6.flowlabel = label;
L
Linus Torvalds 已提交
964

965 966 967
	buff->ip_summed = CHECKSUM_PARTIAL;
	buff->csum = 0;

968
	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
L
Linus Torvalds 已提交
969

970
	fl6.flowi6_proto = IPPROTO_TCP;
971
	if (rt6_need_strict(&fl6.daddr) && !oif)
E
Eric Dumazet 已提交
972
		fl6.flowi6_oif = tcp_v6_iif(skb);
973 974 975 976 977 978
	else {
		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
			oif = skb->skb_iif;

		fl6.flowi6_oif = oif;
	}
979

980 981 982 983 984 985 986 987 988
	if (sk) {
		if (sk->sk_state == TCP_TIME_WAIT) {
			mark = inet_twsk(sk)->tw_mark;
			/* autoflowlabel relies on buff->hash */
			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
				     PKT_HASH_TYPE_L4);
		} else {
			mark = sk->sk_mark;
		}
989
		buff->tstamp = tcp_transmit_time(sk);
990
	}
J
Jon Maxwell 已提交
991
	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
992 993
	fl6.fl6_dport = t1->dest;
	fl6.fl6_sport = t1->source;
994
	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
995
	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
L
Linus Torvalds 已提交
996

997 998 999 1000
	/* Pass a socket to ip6_dst_lookup either it is for RST
	 * Underlying function will use this to retrieve the network
	 * namespace
	 */
1001
	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1002 1003
	if (!IS_ERR(dst)) {
		skb_dst_set(buff, dst);
1004 1005
		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
			 tclass & ~INET_ECN_MASK, priority);
1006
		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1007
		if (rst)
1008
			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1009
		return;
L
Linus Torvalds 已提交
1010 1011 1012 1013 1014
	}

	kfree_skb(buff);
}

1015
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
1016
{
1017
	const struct tcphdr *th = tcp_hdr(skb);
1018
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1019
	u32 seq = 0, ack_seq = 0;
1020
	struct tcp_md5sig_key *key = NULL;
1021 1022 1023 1024 1025 1026
#ifdef CONFIG_TCP_MD5SIG
	const __u8 *hash_location = NULL;
	unsigned char newhash[16];
	int genhash;
	struct sock *sk1 = NULL;
#endif
1027
	__be32 label = 0;
1028
	u32 priority = 0;
1029
	struct net *net;
1030
	int oif = 0;
L
Linus Torvalds 已提交
1031

1032
	if (th->rst)
L
Linus Torvalds 已提交
1033 1034
		return;

1035 1036 1037 1038
	/* If sk not NULL, it means we did a successful lookup and incoming
	 * route had to be correct. prequeue might have dropped our dst.
	 */
	if (!sk && !ipv6_unicast_destination(skb))
1039
		return;
L
Linus Torvalds 已提交
1040

1041
	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1042
#ifdef CONFIG_TCP_MD5SIG
1043
	rcu_read_lock();
1044
	hash_location = tcp_parse_md5sig_option(th);
1045
	if (sk && sk_fullsock(sk)) {
1046 1047 1048 1049 1050 1051 1052
		int l3index;

		/* sdif set, means packet ingressed via a device
		 * in an L3 domain and inet_iif is set to it.
		 */
		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1053
	} else if (hash_location) {
1054 1055
		int dif = tcp_v6_iif_l3_slave(skb);
		int sdif = tcp_v6_sdif(skb);
1056
		int l3index;
1057

1058 1059 1060 1061 1062 1063 1064
		/*
		 * active side is lost. Try to find listening socket through
		 * source port, and then find md5 key through listening socket.
		 * we are not loose security here:
		 * Incoming packet is checked with md5 hash with finding key,
		 * no RST generated if md5 hash doesn't match.
		 */
1065
		sk1 = inet6_lookup_listener(net,
1066 1067
					   &tcp_hashinfo, NULL, 0,
					   &ipv6h->saddr,
1068
					   th->source, &ipv6h->daddr,
1069
					   ntohs(th->source), dif, sdif);
1070
		if (!sk1)
1071
			goto out;
1072

1073 1074 1075 1076 1077 1078
		/* sdif set, means packet ingressed via a device
		 * in an L3 domain and dif is set to it.
		 */
		l3index = tcp_v6_sdif(skb) ? dif : 0;

		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1079
		if (!key)
1080
			goto out;
1081

1082
		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1083
		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1084
			goto out;
1085
	}
1086 1087
#endif

1088 1089 1090 1091 1092
	if (th->ack)
		seq = ntohl(th->ack_seq);
	else
		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
			  (th->doff << 2);
L
Linus Torvalds 已提交
1093

1094 1095
	if (sk) {
		oif = sk->sk_bound_dev_if;
1096 1097 1098
		if (sk_fullsock(sk)) {
			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);

1099
			trace_tcp_send_reset(sk, skb);
1100 1101
			if (np->repflow)
				label = ip6_flowlabel(ipv6h);
1102
			priority = sk->sk_priority;
1103
		}
1104
		if (sk->sk_state == TCP_TIME_WAIT) {
1105
			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1106 1107
			priority = inet_twsk(sk)->tw_priority;
		}
1108
	} else {
1109
		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1110
			label = ip6_flowlabel(ipv6h);
1111 1112
	}

1113 1114
	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
			     ipv6_get_dsfield(ipv6h), label, priority);
1115 1116

#ifdef CONFIG_TCP_MD5SIG
1117 1118
out:
	rcu_read_unlock();
1119
#endif
1120
}
L
Linus Torvalds 已提交
1121

1122
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1123
			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1124
			    struct tcp_md5sig_key *key, u8 tclass,
1125
			    __be32 label, u32 priority)
1126
{
1127
	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1128
			     tclass, label, priority);
L
Linus Torvalds 已提交
1129 1130 1131 1132
}

static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
1133
	struct inet_timewait_sock *tw = inet_twsk(sk);
1134
	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
L
Linus Torvalds 已提交
1135

1136
	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1137
			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1138
			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
W
Wang Yufen 已提交
1139
			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1140
			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
L
Linus Torvalds 已提交
1141

1142
	inet_twsk_put(tw);
L
Linus Torvalds 已提交
1143 1144
}

1145
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1146
				  struct request_sock *req)
L
Linus Torvalds 已提交
1147
{
1148 1149 1150 1151
	int l3index;

	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;

1152 1153 1154
	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
	 */
1155 1156 1157 1158 1159
	/* RFC 7323 2.3
	 * The window field (SEG.WND) of every outgoing segment, with the
	 * exception of <SYN> segments, MUST be right-shifted by
	 * Rcv.Wind.Shift bits:
	 */
1160
	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1161
			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1162 1163
			tcp_rsk(req)->rcv_nxt,
			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1164
			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1165
			req->ts_recent, sk->sk_bound_dev_if,
1166
			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1167
			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
L
Linus Torvalds 已提交
1168 1169 1170
}


1171
static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
1172
{
1173
#ifdef CONFIG_SYN_COOKIES
1174
	const struct tcphdr *th = tcp_hdr(skb);
L
Linus Torvalds 已提交
1175

1176
	if (!th->syn)
1177
		sk = cookie_v6_check(sk, skb);
L
Linus Torvalds 已提交
1178 1179 1180 1181
#endif
	return sk;
}

1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
			 struct tcphdr *th, u32 *cookie)
{
	u16 mss = 0;
#ifdef CONFIG_SYN_COOKIES
	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
				    &tcp_request_sock_ipv6_ops, sk, th);
	if (mss) {
		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
		tcp_synq_overflow(sk);
	}
#endif
	return mss;
}

L
Linus Torvalds 已提交
1197 1198 1199 1200 1201 1202
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
	if (skb->protocol == htons(ETH_P_IP))
		return tcp_v4_conn_request(sk, skb);

	if (!ipv6_unicast_destination(skb))
1203
		goto drop;
L
Linus Torvalds 已提交
1204

1205 1206 1207 1208 1209
	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
		return 0;
	}

O
Octavian Purdila 已提交
1210 1211
	return tcp_conn_request(&tcp6_request_sock_ops,
				&tcp_request_sock_ipv6_ops, sk, skb);
L
Linus Torvalds 已提交
1212 1213

drop:
1214
	tcp_listendrop(sk);
L
Linus Torvalds 已提交
1215 1216 1217
	return 0; /* don't send reset */
}

1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
static void tcp_v6_restore_cb(struct sk_buff *skb)
{
	/* We need to move header back to the beginning if xfrm6_policy_check()
	 * and tcp_v6_fill_cb() are going to be called again.
	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
	 */
	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
		sizeof(struct inet6_skb_parm));
}

1228
static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
W
Weilong Chen 已提交
1229
					 struct request_sock *req,
1230 1231 1232
					 struct dst_entry *dst,
					 struct request_sock *req_unhash,
					 bool *own_req)
L
Linus Torvalds 已提交
1233
{
1234
	struct inet_request_sock *ireq;
1235
	struct ipv6_pinfo *newnp;
E
Eric Dumazet 已提交
1236
	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1237
	struct ipv6_txoptions *opt;
L
Linus Torvalds 已提交
1238
	struct inet_sock *newinet;
1239
	bool found_dup_sk = false;
L
Linus Torvalds 已提交
1240 1241
	struct tcp_sock *newtp;
	struct sock *newsk;
1242 1243
#ifdef CONFIG_TCP_MD5SIG
	struct tcp_md5sig_key *key;
1244
	int l3index;
1245
#endif
1246
	struct flowi6 fl6;
L
Linus Torvalds 已提交
1247 1248 1249 1250 1251 1252

	if (skb->protocol == htons(ETH_P_IP)) {
		/*
		 *	v6 mapped
		 */

1253 1254
		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
					     req_unhash, own_req);
L
Linus Torvalds 已提交
1255

1256
		if (!newsk)
L
Linus Torvalds 已提交
1257 1258
			return NULL;

E
Eric Dumazet 已提交
1259
		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1260 1261

		newinet = inet_sk(newsk);
E
Eric Dumazet 已提交
1262
		newnp = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1263 1264 1265 1266
		newtp = tcp_sk(newsk);

		memcpy(newnp, np, sizeof(struct ipv6_pinfo));

1267
		newnp->saddr = newsk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
1268

1269
		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1270
		if (sk_is_mptcp(newsk))
1271
			mptcpv6_handle_mapped(newsk, true);
L
Linus Torvalds 已提交
1272
		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1273 1274 1275 1276
#ifdef CONFIG_TCP_MD5SIG
		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif

1277
		newnp->ipv6_mc_list = NULL;
1278 1279
		newnp->ipv6_ac_list = NULL;
		newnp->ipv6_fl_list = NULL;
L
Linus Torvalds 已提交
1280 1281
		newnp->pktoptions  = NULL;
		newnp->opt	   = NULL;
1282 1283 1284
		newnp->mcast_oif   = inet_iif(skb);
		newnp->mcast_hops  = ip_hdr(skb)->ttl;
		newnp->rcv_flowinfo = 0;
1285
		if (np->repflow)
1286
			newnp->flow_label = 0;
L
Linus Torvalds 已提交
1287

1288 1289 1290 1291
		/*
		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
		 * here, tcp_create_openreq_child now does this for us, see the comment in
		 * that function for the gory details. -acme
L
Linus Torvalds 已提交
1292 1293 1294
		 */

		/* It is tricky place. Until this moment IPv4 tcp
1295
		   worked with IPv6 icsk.icsk_af_ops.
L
Linus Torvalds 已提交
1296 1297
		   Sync it now.
		 */
1298
		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
L
Linus Torvalds 已提交
1299 1300 1301 1302

		return newsk;
	}

1303
	ireq = inet_rsk(req);
L
Linus Torvalds 已提交
1304 1305 1306 1307

	if (sk_acceptq_is_full(sk))
		goto out_overflow;

1308
	if (!dst) {
1309
		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1310
		if (!dst)
L
Linus Torvalds 已提交
1311
			goto out;
1312
	}
L
Linus Torvalds 已提交
1313 1314

	newsk = tcp_create_openreq_child(sk, req, skb);
1315
	if (!newsk)
1316
		goto out_nonewsk;
L
Linus Torvalds 已提交
1317

1318 1319 1320 1321 1322
	/*
	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
	 * count here, tcp_create_openreq_child now does this for us, see the
	 * comment in that function for the gory details. -acme
	 */
L
Linus Torvalds 已提交
1323

1324
	newsk->sk_gso_type = SKB_GSO_TCPV6;
E
Eric Dumazet 已提交
1325
	ip6_dst_store(newsk, dst, NULL, NULL);
1326
	inet6_sk_rx_dst_set(newsk, skb);
L
Linus Torvalds 已提交
1327

E
Eric Dumazet 已提交
1328
	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1329 1330 1331

	newtp = tcp_sk(newsk);
	newinet = inet_sk(newsk);
E
Eric Dumazet 已提交
1332
	newnp = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1333 1334 1335

	memcpy(newnp, np, sizeof(struct ipv6_pinfo));

1336 1337 1338 1339
	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
	newnp->saddr = ireq->ir_v6_loc_addr;
	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
	newsk->sk_bound_dev_if = ireq->ir_iif;
L
Linus Torvalds 已提交
1340

1341
	/* Now IPv6 options...
L
Linus Torvalds 已提交
1342 1343 1344

	   First: no IPv4 options.
	 */
1345
	newinet->inet_opt = NULL;
1346
	newnp->ipv6_mc_list = NULL;
1347
	newnp->ipv6_ac_list = NULL;
1348
	newnp->ipv6_fl_list = NULL;
L
Linus Torvalds 已提交
1349 1350 1351 1352 1353 1354

	/* Clone RX bits */
	newnp->rxopt.all = np->rxopt.all;

	newnp->pktoptions = NULL;
	newnp->opt	  = NULL;
E
Eric Dumazet 已提交
1355
	newnp->mcast_oif  = tcp_v6_iif(skb);
1356
	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1357
	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1358 1359
	if (np->repflow)
		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
L
Linus Torvalds 已提交
1360

W
Wei Wang 已提交
1361 1362 1363
	/* Set ToS of the new socket based upon the value of incoming SYN.
	 * ECT bits are set later in tcp_init_transfer().
	 */
1364 1365 1366
	if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;

L
Linus Torvalds 已提交
1367 1368 1369 1370 1371 1372
	/* Clone native IPv6 options from listening socket (if any)

	   Yes, keeping reference count would be much more clever,
	   but we make one more one thing there: reattach optmem
	   to newsk.
	 */
1373 1374 1375
	opt = ireq->ipv6_opt;
	if (!opt)
		opt = rcu_dereference(np->opt);
1376 1377 1378 1379
	if (opt) {
		opt = ipv6_dup_options(newsk, opt);
		RCU_INIT_POINTER(newnp->opt, opt);
	}
1380
	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1381 1382 1383
	if (opt)
		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
						    opt->opt_flen;
L
Linus Torvalds 已提交
1384

1385 1386
	tcp_ca_openreq_child(newsk, dst);

L
Linus Torvalds 已提交
1387
	tcp_sync_mss(newsk, dst_mtu(dst));
E
Eric Dumazet 已提交
1388
	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1389

L
Linus Torvalds 已提交
1390 1391
	tcp_initialize_rcv_mss(newsk);

E
Eric Dumazet 已提交
1392 1393
	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
L
Linus Torvalds 已提交
1394

1395
#ifdef CONFIG_TCP_MD5SIG
1396 1397
	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);

1398
	/* Copy over the MD5 key from the original socket */
1399
	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1400
	if (key) {
1401 1402 1403 1404 1405
		/* We're using one, so create a matching key
		 * on the newsk structure. If we fail to get
		 * memory, then we end up not copying the key
		 * across. Shucks.
		 */
1406
		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1407
			       AF_INET6, 128, l3index, key->key, key->keylen,
1408
			       sk_gfp_mask(sk, GFP_ATOMIC));
1409 1410 1411
	}
#endif

1412
	if (__inet_inherit_port(sk, newsk) < 0) {
1413 1414
		inet_csk_prepare_forced_close(newsk);
		tcp_done(newsk);
1415 1416
		goto out;
	}
1417 1418
	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
				       &found_dup_sk);
E
Eric Dumazet 已提交
1419
	if (*own_req) {
1420
		tcp_move_syn(newtp, req);
E
Eric Dumazet 已提交
1421 1422 1423 1424

		/* Clone pktoptions received with SYN, if we own the req */
		if (ireq->pktopts) {
			newnp->pktoptions = skb_clone(ireq->pktopts,
1425
						      sk_gfp_mask(sk, GFP_ATOMIC));
E
Eric Dumazet 已提交
1426 1427
			consume_skb(ireq->pktopts);
			ireq->pktopts = NULL;
1428 1429
			if (newnp->pktoptions) {
				tcp_v6_restore_cb(newnp->pktoptions);
E
Eric Dumazet 已提交
1430
				skb_set_owner_r(newnp->pktoptions, newsk);
1431
			}
E
Eric Dumazet 已提交
1432
		}
1433 1434 1435 1436 1437 1438 1439 1440 1441
	} else {
		if (!req_unhash && found_dup_sk) {
			/* This code path should only be executed in the
			 * syncookie case only
			 */
			bh_unlock_sock(newsk);
			sock_put(newsk);
			newsk = NULL;
		}
E
Eric Dumazet 已提交
1442
	}
L
Linus Torvalds 已提交
1443 1444 1445 1446

	return newsk;

out_overflow:
1447
	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1448
out_nonewsk:
L
Linus Torvalds 已提交
1449
	dst_release(dst);
1450
out:
1451
	tcp_listendrop(sk);
L
Linus Torvalds 已提交
1452 1453 1454
	return NULL;
}

1455 1456
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
							   u32));
L
Linus Torvalds 已提交
1457
/* The socket must have it's spinlock held when we get
1458
 * here, unless it is a TCP_LISTEN socket.
L
Linus Torvalds 已提交
1459 1460 1461 1462 1463 1464 1465 1466
 *
 * We have a potential double-lock case here, so even when
 * doing backlog processing we use the BH locking scheme.
 * This is because we cannot sleep with the original spinlock
 * held.
 */
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
E
Eric Dumazet 已提交
1467
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
L
Linus Torvalds 已提交
1468
	struct sk_buff *opt_skb = NULL;
E
Eric Dumazet 已提交
1469
	struct tcp_sock *tp;
L
Linus Torvalds 已提交
1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497

	/* Imagine: socket is IPv6. IPv4 packet arrives,
	   goes to IPv4 receive handler and backlogged.
	   From backlog it always goes here. Kerboom...
	   Fortunately, tcp_rcv_established and rcv_established
	   handle them correctly, but it is not case with
	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
	 */

	if (skb->protocol == htons(ETH_P_IP))
		return tcp_v4_do_rcv(sk, skb);

	/*
	 *	socket locking is here for SMP purposes as backlog rcv
	 *	is currently called with bh processing disabled.
	 */

	/* Do Stevens' IPV6_PKTOPTIONS.

	   Yes, guys, it is the only place in our code, where we
	   may make it not affecting IPv4.
	   The rest of code is protocol independent,
	   and I do not like idea to uglify IPv4.

	   Actually, all the idea behind IPV6_PKTOPTIONS
	   looks not very well thought. For now we latch
	   options, received in the last packet, enqueued
	   by tcp. Feel free to propose better solution.
1498
					       --ANK (980728)
L
Linus Torvalds 已提交
1499 1500
	 */
	if (np->rxopt.all)
1501
		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
L
Linus Torvalds 已提交
1502 1503

	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
E
Eric Dumazet 已提交
1504 1505
		struct dst_entry *dst = sk->sk_rx_dst;

1506
		sock_rps_save_rxhash(sk, skb);
1507
		sk_mark_napi_id(sk, skb);
E
Eric Dumazet 已提交
1508 1509
		if (dst) {
			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1510 1511
			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
					    dst, np->rx_dst_cookie) == NULL) {
E
Eric Dumazet 已提交
1512 1513 1514 1515 1516
				dst_release(dst);
				sk->sk_rx_dst = NULL;
			}
		}

1517
		tcp_rcv_established(sk, skb);
L
Linus Torvalds 已提交
1518 1519 1520 1521 1522
		if (opt_skb)
			goto ipv6_pktoptions;
		return 0;
	}

E
Eric Dumazet 已提交
1523
	if (tcp_checksum_complete(skb))
L
Linus Torvalds 已提交
1524 1525
		goto csum_err;

1526
	if (sk->sk_state == TCP_LISTEN) {
1527 1528
		struct sock *nsk = tcp_v6_cookie_check(sk, skb);

L
Linus Torvalds 已提交
1529 1530 1531
		if (!nsk)
			goto discard;

W
Weilong Chen 已提交
1532
		if (nsk != sk) {
L
Linus Torvalds 已提交
1533 1534 1535 1536 1537 1538
			if (tcp_child_process(sk, nsk, skb))
				goto reset;
			if (opt_skb)
				__kfree_skb(opt_skb);
			return 0;
		}
1539
	} else
1540
		sock_rps_save_rxhash(sk, skb);
L
Linus Torvalds 已提交
1541

1542
	if (tcp_rcv_state_process(sk, skb))
L
Linus Torvalds 已提交
1543 1544 1545 1546 1547 1548
		goto reset;
	if (opt_skb)
		goto ipv6_pktoptions;
	return 0;

reset:
1549
	tcp_v6_send_reset(sk, skb);
L
Linus Torvalds 已提交
1550 1551 1552 1553 1554 1555
discard:
	if (opt_skb)
		__kfree_skb(opt_skb);
	kfree_skb(skb);
	return 0;
csum_err:
1556
	trace_tcp_bad_csum(skb);
1557 1558
	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
L
Linus Torvalds 已提交
1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572
	goto discard;


ipv6_pktoptions:
	/* Do you ask, what is it?

	   1. skb was enqueued by tcp.
	   2. skb is added to tail of read queue, rather than out of order.
	   3. socket is not in passive state.
	   4. Finally, it really contains options, which user wants to receive.
	 */
	tp = tcp_sk(sk);
	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1573
		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
E
Eric Dumazet 已提交
1574
			np->mcast_oif = tcp_v6_iif(opt_skb);
1575
		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1576
			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1577
		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1578
			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1579 1580
		if (np->repflow)
			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1581
		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
L
Linus Torvalds 已提交
1582
			skb_set_owner_r(opt_skb, sk);
1583
			tcp_v6_restore_cb(opt_skb);
L
Linus Torvalds 已提交
1584 1585 1586 1587 1588 1589 1590
			opt_skb = xchg(&np->pktoptions, opt_skb);
		} else {
			__kfree_skb(opt_skb);
			opt_skb = xchg(&np->pktoptions, NULL);
		}
	}

1591
	kfree_skb(opt_skb);
L
Linus Torvalds 已提交
1592 1593 1594
	return 0;
}

1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
			   const struct tcphdr *th)
{
	/* This is tricky: we move IP6CB at its correct location into
	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
	 * _decode_session6() uses IP6CB().
	 * barrier() makes sure compiler won't play aliasing games.
	 */
	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
		sizeof(struct inet6_skb_parm));
	barrier();

	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
				    skb->len - th->doff*4);
	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
	TCP_SKB_CB(skb)->sacked = 0;
1615 1616
	TCP_SKB_CB(skb)->has_rxtstamp =
			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1617 1618
}

1619
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
1620
{
E
Eric Dumazet 已提交
1621
	struct sk_buff *skb_to_free;
1622
	int sdif = inet6_sdif(skb);
1623
	int dif = inet6_iif(skb);
1624
	const struct tcphdr *th;
1625
	const struct ipv6hdr *hdr;
1626
	bool refcounted;
L
Linus Torvalds 已提交
1627 1628
	struct sock *sk;
	int ret;
1629
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
1630 1631 1632 1633 1634 1635 1636

	if (skb->pkt_type != PACKET_HOST)
		goto discard_it;

	/*
	 *	Count it even if it's bad.
	 */
E
Eric Dumazet 已提交
1637
	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
L
Linus Torvalds 已提交
1638 1639 1640 1641

	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
		goto discard_it;

1642
	th = (const struct tcphdr *)skb->data;
L
Linus Torvalds 已提交
1643

1644
	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
L
Linus Torvalds 已提交
1645 1646 1647 1648
		goto bad_packet;
	if (!pskb_may_pull(skb, th->doff*4))
		goto discard_it;

1649
	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1650
		goto csum_error;
L
Linus Torvalds 已提交
1651

1652
	th = (const struct tcphdr *)skb->data;
1653
	hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
1654

1655
lookup:
1656
	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1657
				th->source, th->dest, inet6_iif(skb), sdif,
1658
				&refcounted);
L
Linus Torvalds 已提交
1659 1660 1661 1662 1663 1664 1665
	if (!sk)
		goto no_tcp_socket;

process:
	if (sk->sk_state == TCP_TIME_WAIT)
		goto do_time_wait;

1666 1667
	if (sk->sk_state == TCP_NEW_SYN_RECV) {
		struct request_sock *req = inet_reqsk(sk);
1668
		bool req_stolen = false;
1669
		struct sock *nsk;
1670 1671

		sk = req->rsk_listener;
1672
		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1673
			sk_drops_add(sk, skb);
1674 1675 1676
			reqsk_put(req);
			goto discard_it;
		}
1677 1678 1679 1680
		if (tcp_checksum_complete(skb)) {
			reqsk_put(req);
			goto csum_error;
		}
1681
		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692
			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
			if (!nsk) {
				inet_csk_reqsk_queue_drop_and_put(sk, req);
				goto lookup;
			}
			sk = nsk;
			/* reuseport_migrate_sock() has already held one sk_refcnt
			 * before returning.
			 */
		} else {
			sock_hold(sk);
1693
		}
1694
		refcounted = true;
E
Eric Dumazet 已提交
1695
		nsk = NULL;
1696 1697 1698 1699
		if (!tcp_filter(sk, skb)) {
			th = (const struct tcphdr *)skb->data;
			hdr = ipv6_hdr(skb);
			tcp_v6_fill_cb(skb, hdr, th);
1700
			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1701
		}
1702 1703
		if (!nsk) {
			reqsk_put(req);
1704 1705 1706 1707 1708 1709 1710 1711 1712 1713
			if (req_stolen) {
				/* Another cpu got exclusive access to req
				 * and created a full blown socket.
				 * Try to feed this packet to this socket
				 * instead of discarding it.
				 */
				tcp_v6_restore_cb(skb);
				sock_put(sk);
				goto lookup;
			}
1714
			goto discard_and_relse;
1715 1716 1717 1718 1719 1720
		}
		if (nsk == sk) {
			reqsk_put(req);
			tcp_v6_restore_cb(skb);
		} else if (tcp_child_process(sk, nsk, skb)) {
			tcp_v6_send_reset(nsk, skb);
1721
			goto discard_and_relse;
1722
		} else {
1723
			sock_put(sk);
1724 1725 1726
			return 0;
		}
	}
E
Eric Dumazet 已提交
1727
	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1728
		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1729 1730 1731
		goto discard_and_relse;
	}

L
Linus Torvalds 已提交
1732 1733 1734
	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
		goto discard_and_relse;

1735
	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1736 1737
		goto discard_and_relse;

1738
	if (tcp_filter(sk, skb))
L
Linus Torvalds 已提交
1739
		goto discard_and_relse;
1740 1741
	th = (const struct tcphdr *)skb->data;
	hdr = ipv6_hdr(skb);
1742
	tcp_v6_fill_cb(skb, hdr, th);
L
Linus Torvalds 已提交
1743 1744 1745

	skb->dev = NULL;

1746 1747 1748 1749 1750 1751 1752
	if (sk->sk_state == TCP_LISTEN) {
		ret = tcp_v6_do_rcv(sk, skb);
		goto put_and_return;
	}

	sk_incoming_cpu_update(sk);

1753
	bh_lock_sock_nested(sk);
1754
	tcp_segs_in(tcp_sk(sk), skb);
L
Linus Torvalds 已提交
1755 1756
	ret = 0;
	if (!sock_owned_by_user(sk)) {
E
Eric Dumazet 已提交
1757 1758
		skb_to_free = sk->sk_rx_skb_cache;
		sk->sk_rx_skb_cache = NULL;
F
Florian Westphal 已提交
1759
		ret = tcp_v6_do_rcv(sk, skb);
E
Eric Dumazet 已提交
1760 1761 1762 1763
	} else {
		if (tcp_add_backlog(sk, skb))
			goto discard_and_relse;
		skb_to_free = NULL;
Z
Zhu Yi 已提交
1764
	}
L
Linus Torvalds 已提交
1765
	bh_unlock_sock(sk);
E
Eric Dumazet 已提交
1766 1767
	if (skb_to_free)
		__kfree_skb(skb_to_free);
1768
put_and_return:
1769 1770
	if (refcounted)
		sock_put(sk);
L
Linus Torvalds 已提交
1771 1772 1773 1774 1775 1776
	return ret ? -1 : 0;

no_tcp_socket:
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
		goto discard_it;

1777 1778
	tcp_v6_fill_cb(skb, hdr, th);

E
Eric Dumazet 已提交
1779
	if (tcp_checksum_complete(skb)) {
1780
csum_error:
1781
		trace_tcp_bad_csum(skb);
E
Eric Dumazet 已提交
1782
		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
L
Linus Torvalds 已提交
1783
bad_packet:
E
Eric Dumazet 已提交
1784
		__TCP_INC_STATS(net, TCP_MIB_INERRS);
L
Linus Torvalds 已提交
1785
	} else {
1786
		tcp_v6_send_reset(NULL, skb);
L
Linus Torvalds 已提交
1787 1788 1789 1790 1791 1792 1793
	}

discard_it:
	kfree_skb(skb);
	return 0;

discard_and_relse:
1794
	sk_drops_add(sk, skb);
1795 1796
	if (refcounted)
		sock_put(sk);
L
Linus Torvalds 已提交
1797 1798 1799 1800
	goto discard_it;

do_time_wait:
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801
		inet_twsk_put(inet_twsk(sk));
L
Linus Torvalds 已提交
1802 1803 1804
		goto discard_it;
	}

1805 1806
	tcp_v6_fill_cb(skb, hdr, th);

1807 1808 1809
	if (tcp_checksum_complete(skb)) {
		inet_twsk_put(inet_twsk(sk));
		goto csum_error;
L
Linus Torvalds 已提交
1810 1811
	}

1812
	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
L
Linus Torvalds 已提交
1813 1814 1815 1816
	case TCP_TW_SYN:
	{
		struct sock *sk2;

1817
		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1818
					    skb, __tcp_hdrlen(th),
1819
					    &ipv6_hdr(skb)->saddr, th->source,
1820
					    &ipv6_hdr(skb)->daddr,
1821 1822
					    ntohs(th->dest),
					    tcp_v6_iif_l3_slave(skb),
1823
					    sdif);
1824
		if (sk2) {
1825
			struct inet_timewait_sock *tw = inet_twsk(sk);
1826
			inet_twsk_deschedule_put(tw);
L
Linus Torvalds 已提交
1827
			sk = sk2;
1828
			tcp_v6_restore_cb(skb);
1829
			refcounted = false;
L
Linus Torvalds 已提交
1830 1831 1832
			goto process;
		}
	}
1833
		/* to ACK */
J
Joe Perches 已提交
1834
		fallthrough;
L
Linus Torvalds 已提交
1835 1836 1837 1838
	case TCP_TW_ACK:
		tcp_v6_timewait_ack(sk, skb);
		break;
	case TCP_TW_RST:
1839 1840 1841
		tcp_v6_send_reset(sk, skb);
		inet_twsk_deschedule_put(inet_twsk(sk));
		goto discard_it;
W
Wang Yufen 已提交
1842 1843
	case TCP_TW_SUCCESS:
		;
L
Linus Torvalds 已提交
1844 1845 1846 1847
	}
	goto discard_it;
}

1848
INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
E
Eric Dumazet 已提交
1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865
{
	const struct ipv6hdr *hdr;
	const struct tcphdr *th;
	struct sock *sk;

	if (skb->pkt_type != PACKET_HOST)
		return;

	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
		return;

	hdr = ipv6_hdr(skb);
	th = tcp_hdr(skb);

	if (th->doff < sizeof(struct tcphdr) / 4)
		return;

E
Eric Dumazet 已提交
1866
	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
E
Eric Dumazet 已提交
1867 1868 1869
	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
					&hdr->saddr, th->source,
					&hdr->daddr, ntohs(th->dest),
1870
					inet6_iif(skb), inet6_sdif(skb));
E
Eric Dumazet 已提交
1871 1872 1873
	if (sk) {
		skb->sk = sk;
		skb->destructor = sock_edemux;
1874
		if (sk_fullsock(sk)) {
1875
			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1876

E
Eric Dumazet 已提交
1877
			if (dst)
E
Eric Dumazet 已提交
1878
				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
E
Eric Dumazet 已提交
1879
			if (dst &&
1880
			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
E
Eric Dumazet 已提交
1881 1882 1883 1884 1885
				skb_dst_set_noref(skb, dst);
		}
	}
}

1886 1887 1888
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
	.twsk_unique	= tcp_twsk_unique,
W
Wang Yufen 已提交
1889
	.twsk_destructor = tcp_twsk_destructor,
1890 1891
};

1892 1893 1894 1895 1896 1897 1898
INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
{
	struct ipv6_pinfo *np = inet6_sk(sk);

	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
}

1899
const struct inet_connection_sock_af_ops ipv6_specific = {
1900 1901 1902
	.queue_xmit	   = inet6_csk_xmit,
	.send_check	   = tcp_v6_send_check,
	.rebuild_header	   = inet6_sk_rebuild_header,
E
Eric Dumazet 已提交
1903
	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1904 1905 1906
	.conn_request	   = tcp_v6_conn_request,
	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
	.net_header_len	   = sizeof(struct ipv6hdr),
1907
	.net_frag_header_len = sizeof(struct frag_hdr),
1908 1909 1910 1911
	.setsockopt	   = ipv6_setsockopt,
	.getsockopt	   = ipv6_getsockopt,
	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1912
	.mtu_reduced	   = tcp_v6_mtu_reduced,
L
Linus Torvalds 已提交
1913 1914
};

1915
#ifdef CONFIG_TCP_MD5SIG
1916
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1917
	.md5_lookup	=	tcp_v6_md5_lookup,
1918
	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1919 1920
	.md5_parse	=	tcp_v6_parse_md5_keys,
};
1921
#endif
1922

L
Linus Torvalds 已提交
1923 1924 1925
/*
 *	TCP over IPv4 via INET6 API
 */
1926
static const struct inet_connection_sock_af_ops ipv6_mapped = {
1927 1928 1929
	.queue_xmit	   = ip_queue_xmit,
	.send_check	   = tcp_v4_send_check,
	.rebuild_header	   = inet_sk_rebuild_header,
1930
	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1931 1932 1933 1934 1935 1936 1937
	.conn_request	   = tcp_v6_conn_request,
	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
	.net_header_len	   = sizeof(struct iphdr),
	.setsockopt	   = ipv6_setsockopt,
	.getsockopt	   = ipv6_getsockopt,
	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1938
	.mtu_reduced	   = tcp_v4_mtu_reduced,
L
Linus Torvalds 已提交
1939 1940
};

1941
#ifdef CONFIG_TCP_MD5SIG
1942
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1943
	.md5_lookup	=	tcp_v4_md5_lookup,
1944
	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1945 1946
	.md5_parse	=	tcp_v6_parse_md5_keys,
};
1947
#endif
1948

L
Linus Torvalds 已提交
1949 1950 1951 1952 1953
/* NOTE: A lot of things set to zero explicitly by call to
 *       sk_alloc() so need not be done here.
 */
static int tcp_v6_init_sock(struct sock *sk)
{
1954
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
1955

1956
	tcp_init_sock(sk);
L
Linus Torvalds 已提交
1957

1958
	icsk->icsk_af_ops = &ipv6_specific;
L
Linus Torvalds 已提交
1959

1960
#ifdef CONFIG_TCP_MD5SIG
1961
	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1962 1963
#endif

L
Linus Torvalds 已提交
1964 1965 1966
	return 0;
}

1967
static void tcp_v6_destroy_sock(struct sock *sk)
L
Linus Torvalds 已提交
1968 1969
{
	tcp_v4_destroy_sock(sk);
1970
	inet6_destroy_sock(sk);
L
Linus Torvalds 已提交
1971 1972
}

1973
#ifdef CONFIG_PROC_FS
L
Linus Torvalds 已提交
1974
/* Proc filesystem TCPv6 sock list dumping. */
1975
static void get_openreq6(struct seq_file *seq,
E
Eric Dumazet 已提交
1976
			 const struct request_sock *req, int i)
L
Linus Torvalds 已提交
1977
{
1978
	long ttd = req->rsk_timer.expires - jiffies;
1979 1980
	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
L
Linus Torvalds 已提交
1981 1982 1983 1984 1985 1986

	if (ttd < 0)
		ttd = 0;

	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1987
		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
L
Linus Torvalds 已提交
1988 1989 1990
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3],
E
Eric Dumazet 已提交
1991
		   inet_rsk(req)->ir_num,
L
Linus Torvalds 已提交
1992 1993
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3],
1994
		   ntohs(inet_rsk(req)->ir_rmt_port),
L
Linus Torvalds 已提交
1995
		   TCP_SYN_RECV,
W
Weilong Chen 已提交
1996
		   0, 0, /* could print option size, but that is af dependent. */
1997 1998
		   1,   /* timers active (only the expire timer) */
		   jiffies_to_clock_t(ttd),
1999
		   req->num_timeout,
E
Eric Dumazet 已提交
2000 2001
		   from_kuid_munged(seq_user_ns(seq),
				    sock_i_uid(req->rsk_listener)),
2002
		   0,  /* non standard timer */
L
Linus Torvalds 已提交
2003 2004 2005 2006 2007 2008
		   0, /* open_requests have no inode */
		   0, req);
}

static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
{
2009
	const struct in6_addr *dest, *src;
L
Linus Torvalds 已提交
2010 2011 2012
	__u16 destp, srcp;
	int timer_active;
	unsigned long timer_expires;
2013 2014
	const struct inet_sock *inet = inet_sk(sp);
	const struct tcp_sock *tp = tcp_sk(sp);
2015
	const struct inet_connection_sock *icsk = inet_csk(sp);
2016
	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2017 2018
	int rx_queue;
	int state;
L
Linus Torvalds 已提交
2019

2020 2021
	dest  = &sp->sk_v6_daddr;
	src   = &sp->sk_v6_rcv_saddr;
E
Eric Dumazet 已提交
2022 2023
	destp = ntohs(inet->inet_dport);
	srcp  = ntohs(inet->inet_sport);
2024

2025
	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2026
	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2027
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
L
Linus Torvalds 已提交
2028
		timer_active	= 1;
2029 2030
		timer_expires	= icsk->icsk_timeout;
	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
L
Linus Torvalds 已提交
2031
		timer_active	= 4;
2032
		timer_expires	= icsk->icsk_timeout;
L
Linus Torvalds 已提交
2033 2034 2035 2036 2037 2038 2039 2040
	} else if (timer_pending(&sp->sk_timer)) {
		timer_active	= 2;
		timer_expires	= sp->sk_timer.expires;
	} else {
		timer_active	= 0;
		timer_expires = jiffies;
	}

2041
	state = inet_sk_state_load(sp);
2042
	if (state == TCP_LISTEN)
2043
		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2044 2045 2046 2047
	else
		/* Because we don't lock the socket,
		 * we might find a transient negative value.
		 */
2048
		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2049
				      READ_ONCE(tp->copied_seq), 0);
2050

L
Linus Torvalds 已提交
2051 2052
	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2053
		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
L
Linus Torvalds 已提交
2054 2055 2056 2057 2058
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3], srcp,
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2059
		   state,
2060
		   READ_ONCE(tp->write_seq) - tp->snd_una,
2061
		   rx_queue,
L
Linus Torvalds 已提交
2062
		   timer_active,
2063
		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2064
		   icsk->icsk_retransmits,
2065
		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2066
		   icsk->icsk_probes_out,
L
Linus Torvalds 已提交
2067
		   sock_i_ino(sp),
2068
		   refcount_read(&sp->sk_refcnt), sp,
2069 2070
		   jiffies_to_clock_t(icsk->icsk_rto),
		   jiffies_to_clock_t(icsk->icsk_ack.ato),
W
Wei Wang 已提交
2071
		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
I
Ilpo Järvinen 已提交
2072
		   tp->snd_cwnd,
2073
		   state == TCP_LISTEN ?
2074
			fastopenq->max_qlen :
2075
			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
L
Linus Torvalds 已提交
2076 2077 2078
		   );
}

2079
static void get_timewait6_sock(struct seq_file *seq,
2080
			       struct inet_timewait_sock *tw, int i)
L
Linus Torvalds 已提交
2081
{
2082
	long delta = tw->tw_timer.expires - jiffies;
2083
	const struct in6_addr *dest, *src;
L
Linus Torvalds 已提交
2084 2085
	__u16 destp, srcp;

2086 2087
	dest = &tw->tw_v6_daddr;
	src  = &tw->tw_v6_rcv_saddr;
L
Linus Torvalds 已提交
2088 2089 2090 2091 2092
	destp = ntohs(tw->tw_dport);
	srcp  = ntohs(tw->tw_sport);

	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
D
Dan Rosenberg 已提交
2093
		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
L
Linus Torvalds 已提交
2094 2095 2096 2097 2098 2099
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3], srcp,
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
		   tw->tw_substate, 0, 0,
2100
		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2101
		   refcount_read(&tw->tw_refcnt), tw);
L
Linus Torvalds 已提交
2102 2103 2104 2105 2106
}

static int tcp6_seq_show(struct seq_file *seq, void *v)
{
	struct tcp_iter_state *st;
E
Eric Dumazet 已提交
2107
	struct sock *sk = v;
L
Linus Torvalds 已提交
2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119

	if (v == SEQ_START_TOKEN) {
		seq_puts(seq,
			 "  sl  "
			 "local_address                         "
			 "remote_address                        "
			 "st tx_queue rx_queue tr tm->when retrnsmt"
			 "   uid  timeout inode\n");
		goto out;
	}
	st = seq->private;

2120 2121 2122
	if (sk->sk_state == TCP_TIME_WAIT)
		get_timewait6_sock(seq, v, st->num);
	else if (sk->sk_state == TCP_NEW_SYN_RECV)
E
Eric Dumazet 已提交
2123
		get_openreq6(seq, v, st->num);
2124 2125
	else
		get_tcp6_sock(seq, v, st->num);
L
Linus Torvalds 已提交
2126 2127 2128 2129
out:
	return 0;
}

2130 2131 2132 2133 2134 2135 2136
static const struct seq_operations tcp6_seq_ops = {
	.show		= tcp6_seq_show,
	.start		= tcp_seq_start,
	.next		= tcp_seq_next,
	.stop		= tcp_seq_stop,
};

L
Linus Torvalds 已提交
2137 2138 2139 2140
static struct tcp_seq_afinfo tcp6_seq_afinfo = {
	.family		= AF_INET6,
};

2141
int __net_init tcp6_proc_init(struct net *net)
L
Linus Torvalds 已提交
2142
{
2143 2144
	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2145 2146
		return -ENOMEM;
	return 0;
L
Linus Torvalds 已提交
2147 2148
}

2149
void tcp6_proc_exit(struct net *net)
L
Linus Torvalds 已提交
2150
{
2151
	remove_proc_entry("tcp6", net->proc_net);
L
Linus Torvalds 已提交
2152 2153 2154 2155 2156 2157 2158
}
#endif

struct proto tcpv6_prot = {
	.name			= "TCPv6",
	.owner			= THIS_MODULE,
	.close			= tcp_close,
A
Andrey Ignatov 已提交
2159
	.pre_connect		= tcp_v6_pre_connect,
L
Linus Torvalds 已提交
2160 2161
	.connect		= tcp_v6_connect,
	.disconnect		= tcp_disconnect,
2162
	.accept			= inet_csk_accept,
L
Linus Torvalds 已提交
2163 2164 2165 2166 2167 2168
	.ioctl			= tcp_ioctl,
	.init			= tcp_v6_init_sock,
	.destroy		= tcp_v6_destroy_sock,
	.shutdown		= tcp_shutdown,
	.setsockopt		= tcp_setsockopt,
	.getsockopt		= tcp_getsockopt,
2169
	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2170
	.keepalive		= tcp_set_keepalive,
L
Linus Torvalds 已提交
2171
	.recvmsg		= tcp_recvmsg,
2172 2173
	.sendmsg		= tcp_sendmsg,
	.sendpage		= tcp_sendpage,
L
Linus Torvalds 已提交
2174
	.backlog_rcv		= tcp_v6_do_rcv,
E
Eric Dumazet 已提交
2175
	.release_cb		= tcp_release_cb,
2176
	.hash			= inet6_hash,
2177 2178
	.unhash			= inet_unhash,
	.get_port		= inet_csk_get_port,
2179 2180 2181
#ifdef CONFIG_BPF_SYSCALL
	.psock_update_sk_prot	= tcp_bpf_update_proto,
#endif
L
Linus Torvalds 已提交
2182
	.enter_memory_pressure	= tcp_enter_memory_pressure,
2183
	.leave_memory_pressure	= tcp_leave_memory_pressure,
2184
	.stream_memory_free	= tcp_stream_memory_free,
L
Linus Torvalds 已提交
2185 2186 2187
	.sockets_allocated	= &tcp_sockets_allocated,
	.memory_allocated	= &tcp_memory_allocated,
	.memory_pressure	= &tcp_memory_pressure,
2188
	.orphan_count		= &tcp_orphan_count,
2189
	.sysctl_mem		= sysctl_tcp_mem,
2190 2191
	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
L
Linus Torvalds 已提交
2192 2193
	.max_header		= MAX_TCP_HEADER,
	.obj_size		= sizeof(struct tcp6_sock),
2194
	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2195
	.twsk_prot		= &tcp6_timewait_sock_ops,
2196
	.rsk_prot		= &tcp6_request_sock_ops,
2197
	.h.hashinfo		= &tcp_hashinfo,
2198
	.no_autobind		= true,
2199
	.diag_destroy		= tcp_abort,
L
Linus Torvalds 已提交
2200
};
2201
EXPORT_SYMBOL_GPL(tcpv6_prot);
L
Linus Torvalds 已提交
2202

2203 2204 2205
/* thinking of making this const? Don't.
 * early_demux can change based on sysctl.
 */
2206
static struct inet6_protocol tcpv6_protocol = {
E
Eric Dumazet 已提交
2207
	.early_demux	=	tcp_v6_early_demux,
2208
	.early_demux_handler =  tcp_v6_early_demux,
L
Linus Torvalds 已提交
2209 2210 2211 2212 2213 2214 2215 2216 2217 2218
	.handler	=	tcp_v6_rcv,
	.err_handler	=	tcp_v6_err,
	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};

static struct inet_protosw tcpv6_protosw = {
	.type		=	SOCK_STREAM,
	.protocol	=	IPPROTO_TCP,
	.prot		=	&tcpv6_prot,
	.ops		=	&inet6_stream_ops,
2219 2220
	.flags		=	INET_PROTOSW_PERMANENT |
				INET_PROTOSW_ICSK,
L
Linus Torvalds 已提交
2221 2222
};

2223
static int __net_init tcpv6_net_init(struct net *net)
2224
{
2225 2226
	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
				    SOCK_RAW, IPPROTO_TCP, net);
2227 2228
}

2229
static void __net_exit tcpv6_net_exit(struct net *net)
2230
{
2231
	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
E
Eric W. Biederman 已提交
2232 2233
}

2234
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
E
Eric W. Biederman 已提交
2235
{
2236
	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2237 2238 2239
}

static struct pernet_operations tcpv6_net_ops = {
E
Eric W. Biederman 已提交
2240 2241 2242
	.init	    = tcpv6_net_init,
	.exit	    = tcpv6_net_exit,
	.exit_batch = tcpv6_net_exit_batch,
2243 2244
};

2245
int __init tcpv6_init(void)
L
Linus Torvalds 已提交
2246
{
2247 2248
	int ret;

2249 2250
	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
	if (ret)
2251
		goto out;
2252

L
Linus Torvalds 已提交
2253
	/* register inet6 protocol */
2254 2255 2256 2257
	ret = inet6_register_protosw(&tcpv6_protosw);
	if (ret)
		goto out_tcpv6_protocol;

2258
	ret = register_pernet_subsys(&tcpv6_net_ops);
2259 2260
	if (ret)
		goto out_tcpv6_protosw;
M
Mat Martineau 已提交
2261 2262 2263 2264 2265

	ret = mptcpv6_init();
	if (ret)
		goto out_tcpv6_pernet_subsys;

2266 2267
out:
	return ret;
2268

M
Mat Martineau 已提交
2269 2270
out_tcpv6_pernet_subsys:
	unregister_pernet_subsys(&tcpv6_net_ops);
2271 2272
out_tcpv6_protosw:
	inet6_unregister_protosw(&tcpv6_protosw);
2273 2274
out_tcpv6_protocol:
	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2275 2276 2277
	goto out;
}

2278
void tcpv6_exit(void)
2279
{
2280
	unregister_pernet_subsys(&tcpv6_net_ops);
2281 2282
	inet6_unregister_protosw(&tcpv6_protosw);
	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
L
Linus Torvalds 已提交
2283
}