tcp_ipv6.c 58.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3
/*
 *	TCP over IPv6
4
 *	Linux INET6 implementation
L
Linus Torvalds 已提交
5 6
 *
 *	Authors:
7
 *	Pedro Roque		<roque@di.fc.ul.pt>
L
Linus Torvalds 已提交
8
 *
9
 *	Based on:
L
Linus Torvalds 已提交
10 11 12 13 14 15 16 17 18 19 20 21
 *	linux/net/ipv4/tcp.c
 *	linux/net/ipv4/tcp_input.c
 *	linux/net/ipv4/tcp_output.c
 *
 *	Fixes:
 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
 */

H
Herbert Xu 已提交
22
#include <linux/bottom_half.h>
L
Linus Torvalds 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/jiffies.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/ipsec.h>
#include <linux/times.h>
37
#include <linux/slab.h>
W
Wang Yufen 已提交
38
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
39 40 41
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
42
#include <linux/indirect_call_wrapper.h>
L
Linus Torvalds 已提交
43 44 45

#include <net/tcp.h>
#include <net/ndisc.h>
46
#include <net/inet6_hashtables.h>
47
#include <net/inet6_connection_sock.h>
L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57
#include <net/ipv6.h>
#include <net/transp_v6.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/ip6_checksum.h>
#include <net/inet_ecn.h>
#include <net/protocol.h>
#include <net/xfrm.h>
#include <net/snmp.h>
#include <net/dsfield.h>
58
#include <net/timewait_sock.h>
59
#include <net/inet_common.h>
60
#include <net/secure_seq.h>
61
#include <net/busy_poll.h>
L
Linus Torvalds 已提交
62 63 64 65

#include <linux/proc_fs.h>
#include <linux/seq_file.h>

H
Herbert Xu 已提交
66
#include <crypto/hash.h>
67 68
#include <linux/scatterlist.h>

69 70
#include <trace/events/tcp.h>

71 72
static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73
				      struct request_sock *req);
L
Linus Torvalds 已提交
74

75
INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
L
Linus Torvalds 已提交
76

77
static const struct inet_connection_sock_af_ops ipv6_mapped;
78
const struct inet_connection_sock_af_ops ipv6_specific;
79
#ifdef CONFIG_TCP_MD5SIG
80 81
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82
#else
83
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 85
						   const struct in6_addr *addr,
						   int l3index)
86 87 88
{
	return NULL;
}
89
#endif
L
Linus Torvalds 已提交
90

E
Eric Dumazet 已提交
91 92 93
/* Helper returning the inet6 address from a given tcp socket.
 * It can be used in TCP stack instead of inet6_sk(sk).
 * This avoids a dereference and allow compiler optimizations.
94
 * It is a specialized version of inet6_sk_generic().
E
Eric Dumazet 已提交
95 96 97
 */
static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
{
98
	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
E
Eric Dumazet 已提交
99

100
	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
E
Eric Dumazet 已提交
101 102
}

103 104 105 106
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);

E
Eric Dumazet 已提交
107
	if (dst && dst_hold_safe(dst)) {
108 109
		const struct rt6_info *rt = (const struct rt6_info *)dst;

110
		rcu_assign_pointer(sk->sk_rx_dst, dst);
111
		sk->sk_rx_dst_ifindex = skb->skb_iif;
112
		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113
	}
114 115
}

116
static u32 tcp_v6_init_seq(const struct sk_buff *skb)
L
Linus Torvalds 已提交
117
{
118 119 120 121 122 123
	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
				ipv6_hdr(skb)->saddr.s6_addr32,
				tcp_hdr(skb)->dest,
				tcp_hdr(skb)->source);
}

124
static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125
{
126
	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127
				   ipv6_hdr(skb)->saddr.s6_addr32);
L
Linus Torvalds 已提交
128 129
}

A
Andrey Ignatov 已提交
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
			      int addr_len)
{
	/* This check is replicated from tcp_v6_connect() and intended to
	 * prevent BPF program called below from accessing bytes that are out
	 * of the bound specified by user in addr_len.
	 */
	if (addr_len < SIN6_LEN_RFC2133)
		return -EINVAL;

	sock_owned_by_me(sk);

	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
}

145
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
L
Linus Torvalds 已提交
146 147 148
			  int addr_len)
{
	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149
	struct inet_connection_sock *icsk = inet_csk(sk);
150
	struct in6_addr *saddr = NULL, *final_p, final;
151
	struct inet_timewait_death_row *tcp_death_row;
E
Eric Dumazet 已提交
152
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153
	struct inet_sock *inet = inet_sk(sk);
L
Linus Torvalds 已提交
154
	struct tcp_sock *tp = tcp_sk(sk);
155
	struct net *net = sock_net(sk);
156
	struct ipv6_txoptions *opt;
L
Linus Torvalds 已提交
157
	struct dst_entry *dst;
158
	struct flowi6 fl6;
L
Linus Torvalds 已提交
159 160 161
	int addr_type;
	int err;

162
	if (addr_len < SIN6_LEN_RFC2133)
L
Linus Torvalds 已提交
163 164
		return -EINVAL;

165
	if (usin->sin6_family != AF_INET6)
E
Eric Dumazet 已提交
166
		return -EAFNOSUPPORT;
L
Linus Torvalds 已提交
167

168
	memset(&fl6, 0, sizeof(fl6));
L
Linus Torvalds 已提交
169 170

	if (np->sndflow) {
171 172 173
		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
		IP6_ECN_flow_init(fl6.flowlabel);
		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
L
Linus Torvalds 已提交
174
			struct ip6_flowlabel *flowlabel;
175
			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176
			if (IS_ERR(flowlabel))
L
Linus Torvalds 已提交
177 178 179 180 181 182
				return -EINVAL;
			fl6_sock_release(flowlabel);
		}
	}

	/*
183 184 185
	 *	connect() to INADDR_ANY means loopback (BSD'ism).
	 */

186 187 188 189 190 191 192
	if (ipv6_addr_any(&usin->sin6_addr)) {
		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
					       &usin->sin6_addr);
		else
			usin->sin6_addr = in6addr_loopback;
	}
L
Linus Torvalds 已提交
193 194 195

	addr_type = ipv6_addr_type(&usin->sin6_addr);

W
Weilong Chen 已提交
196
	if (addr_type & IPV6_ADDR_MULTICAST)
L
Linus Torvalds 已提交
197 198 199 200 201 202 203 204
		return -ENETUNREACH;

	if (addr_type&IPV6_ADDR_LINKLOCAL) {
		if (addr_len >= sizeof(struct sockaddr_in6) &&
		    usin->sin6_scope_id) {
			/* If interface is set while binding, indices
			 * must coincide.
			 */
205
			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
L
Linus Torvalds 已提交
206 207 208 209 210 211 212 213 214 215 216
				return -EINVAL;

			sk->sk_bound_dev_if = usin->sin6_scope_id;
		}

		/* Connect to link-local address requires an interface */
		if (!sk->sk_bound_dev_if)
			return -EINVAL;
	}

	if (tp->rx_opt.ts_recent_stamp &&
217
	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
L
Linus Torvalds 已提交
218 219
		tp->rx_opt.ts_recent = 0;
		tp->rx_opt.ts_recent_stamp = 0;
220
		WRITE_ONCE(tp->write_seq, 0);
L
Linus Torvalds 已提交
221 222
	}

223
	sk->sk_v6_daddr = usin->sin6_addr;
224
	np->flow_label = fl6.flowlabel;
L
Linus Torvalds 已提交
225 226 227 228 229

	/*
	 *	TCP over IPv4
	 */

230
	if (addr_type & IPV6_ADDR_MAPPED) {
231
		u32 exthdrlen = icsk->icsk_ext_hdr_len;
L
Linus Torvalds 已提交
232 233
		struct sockaddr_in sin;

234
		if (ipv6_only_sock(sk))
L
Linus Torvalds 已提交
235 236 237 238 239 240
			return -ENETUNREACH;

		sin.sin_family = AF_INET;
		sin.sin_port = usin->sin6_port;
		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];

241
		icsk->icsk_af_ops = &ipv6_mapped;
242
		if (sk_is_mptcp(sk))
243
			mptcpv6_handle_mapped(sk, true);
L
Linus Torvalds 已提交
244
		sk->sk_backlog_rcv = tcp_v4_do_rcv;
245 246 247
#ifdef CONFIG_TCP_MD5SIG
		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
L
Linus Torvalds 已提交
248 249 250 251

		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));

		if (err) {
252 253
			icsk->icsk_ext_hdr_len = exthdrlen;
			icsk->icsk_af_ops = &ipv6_specific;
254
			if (sk_is_mptcp(sk))
255
				mptcpv6_handle_mapped(sk, false);
L
Linus Torvalds 已提交
256
			sk->sk_backlog_rcv = tcp_v6_do_rcv;
257 258 259
#ifdef CONFIG_TCP_MD5SIG
			tp->af_specific = &tcp_sock_ipv6_specific;
#endif
L
Linus Torvalds 已提交
260 261
			goto failure;
		}
262
		np->saddr = sk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
263 264 265 266

		return err;
	}

267 268
	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
		saddr = &sk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
269

270
	fl6.flowi6_proto = IPPROTO_TCP;
271
	fl6.daddr = sk->sk_v6_daddr;
A
Alexey Dobriyan 已提交
272
	fl6.saddr = saddr ? *saddr : np->saddr;
273 274
	fl6.flowi6_oif = sk->sk_bound_dev_if;
	fl6.flowi6_mark = sk->sk_mark;
275 276
	fl6.fl6_dport = usin->sin6_port;
	fl6.fl6_sport = inet->inet_sport;
277
	fl6.flowi6_uid = sk->sk_uid;
L
Linus Torvalds 已提交
278

279
	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
280
	final_p = fl6_update_dst(&fl6, opt, &final);
L
Linus Torvalds 已提交
281

282
	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
V
Venkat Yekkirala 已提交
283

284
	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
285 286
	if (IS_ERR(dst)) {
		err = PTR_ERR(dst);
L
Linus Torvalds 已提交
287
		goto failure;
288
	}
L
Linus Torvalds 已提交
289

290
	if (!saddr) {
291 292 293 294 295
		struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
		struct in6_addr prev_v6_rcv_saddr;

		if (icsk->icsk_bind2_hash) {
			prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo,
296
								     sk, net, inet->inet_num);
297 298
			prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
		}
299
		saddr = &fl6.saddr;
300
		sk->sk_v6_rcv_saddr = *saddr;
301 302 303 304 305 306 307 308

		if (prev_addr_hashbucket) {
			err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
			if (err) {
				sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
				goto failure;
			}
		}
L
Linus Torvalds 已提交
309 310 311
	}

	/* set the source address */
A
Alexey Dobriyan 已提交
312
	np->saddr = *saddr;
E
Eric Dumazet 已提交
313
	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
L
Linus Torvalds 已提交
314

H
Herbert Xu 已提交
315
	sk->sk_gso_type = SKB_GSO_TCPV6;
E
Eric Dumazet 已提交
316
	ip6_dst_store(sk, dst, NULL, NULL);
L
Linus Torvalds 已提交
317

318
	icsk->icsk_ext_hdr_len = 0;
319 320 321
	if (opt)
		icsk->icsk_ext_hdr_len = opt->opt_flen +
					 opt->opt_nflen;
L
Linus Torvalds 已提交
322 323 324

	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);

E
Eric Dumazet 已提交
325
	inet->inet_dport = usin->sin6_port;
L
Linus Torvalds 已提交
326 327

	tcp_set_state(sk, TCP_SYN_SENT);
328
	tcp_death_row = net->ipv4.tcp_death_row;
329
	err = inet6_hash_connect(tcp_death_row, sk);
L
Linus Torvalds 已提交
330 331 332
	if (err)
		goto late_failure;

333
	sk_set_txhash(sk);
334

335 336
	if (likely(!tp->repair)) {
		if (!tp->write_seq)
337 338 339 340 341
			WRITE_ONCE(tp->write_seq,
				   secure_tcpv6_seq(np->saddr.s6_addr32,
						    sk->sk_v6_daddr.s6_addr32,
						    inet->inet_sport,
						    inet->inet_dport));
342
		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
343
						   sk->sk_v6_daddr.s6_addr32);
344
	}
L
Linus Torvalds 已提交
345

W
Wei Wang 已提交
346 347 348 349 350
	if (tcp_fastopen_defer_connect(sk, &err))
		return err;
	if (err)
		goto late_failure;

L
Linus Torvalds 已提交
351 352 353 354 355 356 357 358 359
	err = tcp_connect(sk);
	if (err)
		goto late_failure;

	return 0;

late_failure:
	tcp_set_state(sk, TCP_CLOSE);
failure:
E
Eric Dumazet 已提交
360
	inet->inet_dport = 0;
L
Linus Torvalds 已提交
361 362 363 364
	sk->sk_route_caps = 0;
	return err;
}

365 366 367
static void tcp_v6_mtu_reduced(struct sock *sk)
{
	struct dst_entry *dst;
368
	u32 mtu;
369 370 371 372

	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
		return;

373 374 375 376 377 378 379 380 381
	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);

	/* Drop requests trying to increase our current mss.
	 * Check done in __ip6_rt_update_pmtu() is too late.
	 */
	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
		return;

	dst = inet6_csk_update_pmtu(sk, mtu);
382 383 384 385 386 387 388 389 390
	if (!dst)
		return;

	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
		tcp_sync_mss(sk, dst_mtu(dst));
		tcp_simple_retransmit(sk);
	}
}

391
static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
392
		u8 type, u8 code, int offset, __be32 info)
L
Linus Torvalds 已提交
393
{
W
Weilong Chen 已提交
394
	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
395
	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
396 397
	struct net *net = dev_net(skb->dev);
	struct request_sock *fastopen;
L
Linus Torvalds 已提交
398
	struct ipv6_pinfo *np;
399
	struct tcp_sock *tp;
400
	__u32 seq, snd_una;
401
	struct sock *sk;
402
	bool fatal;
403
	int err;
L
Linus Torvalds 已提交
404

405 406 407
	sk = __inet6_lookup_established(net, &tcp_hashinfo,
					&hdr->daddr, th->dest,
					&hdr->saddr, ntohs(th->source),
408
					skb->dev->ifindex, inet6_sdif(skb));
L
Linus Torvalds 已提交
409

410
	if (!sk) {
E
Eric Dumazet 已提交
411 412
		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
				  ICMP6_MIB_INERRORS);
413
		return -ENOENT;
L
Linus Torvalds 已提交
414 415 416
	}

	if (sk->sk_state == TCP_TIME_WAIT) {
417
		inet_twsk_put(inet_twsk(sk));
418
		return 0;
L
Linus Torvalds 已提交
419
	}
420
	seq = ntohl(th->seq);
421
	fatal = icmpv6_err_convert(type, code, &err);
422 423 424 425
	if (sk->sk_state == TCP_NEW_SYN_RECV) {
		tcp_req_err(sk, seq, fatal);
		return 0;
	}
L
Linus Torvalds 已提交
426 427

	bh_lock_sock(sk);
428
	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
429
		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
L
Linus Torvalds 已提交
430 431 432 433

	if (sk->sk_state == TCP_CLOSE)
		goto out;

434 435 436 437 438 439
	if (static_branch_unlikely(&ip6_min_hopcount)) {
		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
			goto out;
		}
440 441
	}

L
Linus Torvalds 已提交
442
	tp = tcp_sk(sk);
443
	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
444
	fastopen = rcu_dereference(tp->fastopen_rsk);
445
	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
L
Linus Torvalds 已提交
446
	if (sk->sk_state != TCP_LISTEN &&
447
	    !between(seq, snd_una, tp->snd_nxt)) {
448
		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
L
Linus Torvalds 已提交
449 450 451
		goto out;
	}

E
Eric Dumazet 已提交
452
	np = tcp_inet6_sk(sk);
L
Linus Torvalds 已提交
453

454
	if (type == NDISC_REDIRECT) {
455 456
		if (!sock_owned_by_user(sk)) {
			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
457

458 459 460
			if (dst)
				dst->ops->redirect(dst, sk, skb);
		}
461
		goto out;
462 463
	}

L
Linus Torvalds 已提交
464
	if (type == ICMPV6_PKT_TOOBIG) {
465 466
		u32 mtu = ntohl(info);

467 468 469 470 471 472 473
		/* We are not interested in TCP_LISTEN and open_requests
		 * (SYN-ACKs send out by Linux are always <576bytes so
		 * they should go through unfragmented).
		 */
		if (sk->sk_state == TCP_LISTEN)
			goto out;

474 475 476
		if (!ip6_sk_accept_pmtu(sk))
			goto out;

477 478 479 480 481
		if (mtu < IPV6_MIN_MTU)
			goto out;

		WRITE_ONCE(tp->mtu_info, mtu);

482 483
		if (!sock_owned_by_user(sk))
			tcp_v6_mtu_reduced(sk);
484
		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
485
					   &sk->sk_tsq_flags))
486
			sock_hold(sk);
L
Linus Torvalds 已提交
487 488 489 490
		goto out;
	}


491
	/* Might be for an request_sock */
L
Linus Torvalds 已提交
492 493
	switch (sk->sk_state) {
	case TCP_SYN_SENT:
494 495
	case TCP_SYN_RECV:
		/* Only in fast or simultaneous open. If a fast open socket is
496
		 * already accepted it is treated as a connected one below.
497
		 */
498
		if (fastopen && !fastopen->sk)
499 500
			break;

501 502
		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);

L
Linus Torvalds 已提交
503 504
		if (!sock_owned_by_user(sk)) {
			sk->sk_err = err;
505
			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
L
Linus Torvalds 已提交
506 507 508 509 510

			tcp_done(sk);
		} else
			sk->sk_err_soft = err;
		goto out;
511 512 513 514 515 516 517 518 519
	case TCP_LISTEN:
		break;
	default:
		/* check if this ICMP message allows revert of backoff.
		 * (see RFC 6069)
		 */
		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
		    code == ICMPV6_NOROUTE)
			tcp_ld_RTO_revert(sk, seq);
L
Linus Torvalds 已提交
520 521 522 523
	}

	if (!sock_owned_by_user(sk) && np->recverr) {
		sk->sk_err = err;
524
		sk_error_report(sk);
L
Linus Torvalds 已提交
525 526 527 528 529 530
	} else
		sk->sk_err_soft = err;

out:
	bh_unlock_sock(sk);
	sock_put(sk);
531
	return 0;
L
Linus Torvalds 已提交
532 533 534
}


535
static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
536
			      struct flowi *fl,
537
			      struct request_sock *req,
538
			      struct tcp_fastopen_cookie *foc,
539 540
			      enum tcp_synack_type synack_type,
			      struct sk_buff *syn_skb)
L
Linus Torvalds 已提交
541
{
542
	struct inet_request_sock *ireq = inet_rsk(req);
E
Eric Dumazet 已提交
543
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
544
	struct ipv6_txoptions *opt;
545
	struct flowi6 *fl6 = &fl->u.ip6;
W
Weilong Chen 已提交
546
	struct sk_buff *skb;
547
	int err = -ENOMEM;
548
	u8 tclass;
L
Linus Torvalds 已提交
549

550
	/* First, grab a route. */
551 552
	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
					       IPPROTO_TCP)) == NULL)
553
		goto done;
554

555
	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
556

L
Linus Torvalds 已提交
557
	if (skb) {
558 559
		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
				    &ireq->ir_v6_rmt_addr);
L
Linus Torvalds 已提交
560

561
		fl6->daddr = ireq->ir_v6_rmt_addr;
562
		if (np->repflow && ireq->pktopts)
563 564
			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));

565
		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
W
Wei Wang 已提交
566 567
				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
				(np->tclass & INET_ECN_MASK) :
568
				np->tclass;
569 570 571 572 573 574 575

		if (!INET_ECN_is_capable(tclass) &&
		    tcp_bpf_ca_needs_ecn((struct sock *)req))
			tclass |= INET_ECN_ECT_0;

		rcu_read_lock();
		opt = ireq->ipv6_opt;
576 577
		if (!opt)
			opt = rcu_dereference(np->opt);
578
		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
579
			       tclass, sk->sk_priority);
580
		rcu_read_unlock();
581
		err = net_xmit_eval(err);
L
Linus Torvalds 已提交
582 583 584 585 586 587
	}

done:
	return err;
}

588

589
static void tcp_v6_reqsk_destructor(struct request_sock *req)
L
Linus Torvalds 已提交
590
{
591
	kfree(inet_rsk(req)->ipv6_opt);
592
	consume_skb(inet_rsk(req)->pktopts);
L
Linus Torvalds 已提交
593 594
}

595
#ifdef CONFIG_TCP_MD5SIG
596
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
597 598
						   const struct in6_addr *addr,
						   int l3index)
599
{
600 601
	return tcp_md5_do_lookup(sk, l3index,
				 (union tcp_md5_addr *)addr, AF_INET6);
602 603
}

604
static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
605
						const struct sock *addr_sk)
606
{
607 608 609 610 611 612
	int l3index;

	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
						 addr_sk->sk_bound_dev_if);
	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
				    l3index);
613 614
}

615
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
616
				 sockptr_t optval, int optlen)
617 618 619
{
	struct tcp_md5sig cmd;
	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
620
	int l3index = 0;
621
	u8 prefixlen;
622
	u8 flags;
623 624 625 626

	if (optlen < sizeof(cmd))
		return -EINVAL;

627
	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
628 629 630 631 632
		return -EFAULT;

	if (sin6->sin6_family != AF_INET6)
		return -EINVAL;

633 634
	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;

635 636 637 638 639 640 641 642 643 644
	if (optname == TCP_MD5SIG_EXT &&
	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
		prefixlen = cmd.tcpm_prefixlen;
		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
					prefixlen > 32))
			return -EINVAL;
	} else {
		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
	}

645
	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
		struct net_device *dev;

		rcu_read_lock();
		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
		if (dev && netif_is_l3_master(dev))
			l3index = dev->ifindex;
		rcu_read_unlock();

		/* ok to reference set/not set outside of rcu;
		 * right now device MUST be an L3 master
		 */
		if (!dev || !l3index)
			return -EINVAL;
	}

662
	if (!cmd.tcpm_keylen) {
B
Brian Haley 已提交
663
		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
E
Eric Dumazet 已提交
664
			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
665
					      AF_INET, prefixlen,
666
					      l3index, flags);
E
Eric Dumazet 已提交
667
		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
668
				      AF_INET6, prefixlen, l3index, flags);
669 670 671 672 673
	}

	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
		return -EINVAL;

E
Eric Dumazet 已提交
674 675
	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
676
				      AF_INET, prefixlen, l3index, flags,
677 678
				      cmd.tcpm_key, cmd.tcpm_keylen,
				      GFP_KERNEL);
679

E
Eric Dumazet 已提交
680
	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
681
			      AF_INET6, prefixlen, l3index, flags,
682
			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
683 684
}

685 686 687 688
static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
				   const struct in6_addr *daddr,
				   const struct in6_addr *saddr,
				   const struct tcphdr *th, int nbytes)
689 690
{
	struct tcp6_pseudohdr *bp;
691
	struct scatterlist sg;
692
	struct tcphdr *_th;
693

694
	bp = hp->scratch;
695
	/* 1. TCP pseudo-header (RFC2460) */
A
Alexey Dobriyan 已提交
696 697
	bp->saddr = *saddr;
	bp->daddr = *daddr;
698
	bp->protocol = cpu_to_be32(IPPROTO_TCP);
A
Adam Langley 已提交
699
	bp->len = cpu_to_be32(nbytes);
700

701 702 703 704 705 706 707
	_th = (struct tcphdr *)(bp + 1);
	memcpy(_th, th, sizeof(*th));
	_th->check = 0;

	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
				sizeof(*bp) + sizeof(*th));
H
Herbert Xu 已提交
708
	return crypto_ahash_update(hp->md5_req);
709
}
710

711
static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
712
			       const struct in6_addr *daddr, struct in6_addr *saddr,
E
Eric Dumazet 已提交
713
			       const struct tcphdr *th)
714 715
{
	struct tcp_md5sig_pool *hp;
H
Herbert Xu 已提交
716
	struct ahash_request *req;
717 718 719 720

	hp = tcp_get_md5sig_pool();
	if (!hp)
		goto clear_hash_noput;
H
Herbert Xu 已提交
721
	req = hp->md5_req;
722

H
Herbert Xu 已提交
723
	if (crypto_ahash_init(req))
724
		goto clear_hash;
725
	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
726 727 728
		goto clear_hash;
	if (tcp_md5_hash_key(hp, key))
		goto clear_hash;
H
Herbert Xu 已提交
729 730
	ahash_request_set_crypt(req, NULL, md5_hash, 0);
	if (crypto_ahash_final(req))
731 732 733 734
		goto clear_hash;

	tcp_put_md5sig_pool();
	return 0;
735

736 737 738 739
clear_hash:
	tcp_put_md5sig_pool();
clear_hash_noput:
	memset(md5_hash, 0, 16);
740
	return 1;
741 742
}

743 744
static int tcp_v6_md5_hash_skb(char *md5_hash,
			       const struct tcp_md5sig_key *key,
E
Eric Dumazet 已提交
745 746
			       const struct sock *sk,
			       const struct sk_buff *skb)
747
{
748
	const struct in6_addr *saddr, *daddr;
749
	struct tcp_md5sig_pool *hp;
H
Herbert Xu 已提交
750
	struct ahash_request *req;
E
Eric Dumazet 已提交
751
	const struct tcphdr *th = tcp_hdr(skb);
752

753 754
	if (sk) { /* valid for establish/request sockets */
		saddr = &sk->sk_v6_rcv_saddr;
755
		daddr = &sk->sk_v6_daddr;
756
	} else {
757
		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
758 759
		saddr = &ip6h->saddr;
		daddr = &ip6h->daddr;
760
	}
761 762 763 764

	hp = tcp_get_md5sig_pool();
	if (!hp)
		goto clear_hash_noput;
H
Herbert Xu 已提交
765
	req = hp->md5_req;
766

H
Herbert Xu 已提交
767
	if (crypto_ahash_init(req))
768 769
		goto clear_hash;

770
	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
771 772 773 774 775
		goto clear_hash;
	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
		goto clear_hash;
	if (tcp_md5_hash_key(hp, key))
		goto clear_hash;
H
Herbert Xu 已提交
776 777
	ahash_request_set_crypt(req, NULL, md5_hash, 0);
	if (crypto_ahash_final(req))
778 779 780 781 782 783 784 785 786 787
		goto clear_hash;

	tcp_put_md5sig_pool();
	return 0;

clear_hash:
	tcp_put_md5sig_pool();
clear_hash_noput:
	memset(md5_hash, 0, 16);
	return 1;
788 789
}

790 791
#endif

792 793
static void tcp_v6_init_req(struct request_sock *req,
			    const struct sock *sk_listener,
794 795
			    struct sk_buff *skb)
{
796
	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
797
	struct inet_request_sock *ireq = inet_rsk(req);
E
Eric Dumazet 已提交
798
	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
799 800 801 802 803

	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;

	/* So that link locals have meaning */
804
	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
805
	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
E
Eric Dumazet 已提交
806
		ireq->ir_iif = tcp_v6_iif(skb);
807

808
	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
809
	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
810
	     np->rxopt.bits.rxinfo ||
811 812
	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
	     np->rxopt.bits.rxohlim || np->repflow)) {
813
		refcount_inc(&skb->users);
814 815 816 817
		ireq->pktopts = skb;
	}
}

818
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
819
					  struct sk_buff *skb,
820
					  struct flowi *fl,
821
					  struct request_sock *req)
822
{
823 824 825 826 827
	tcp_v6_init_req(req, sk, skb);

	if (security_inet_conn_request(sk, skb, req))
		return NULL;

828
	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
829 830
}

831
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
L
Linus Torvalds 已提交
832
	.family		=	AF_INET6,
833
	.obj_size	=	sizeof(struct tcp6_request_sock),
834
	.rtx_syn_ack	=	tcp_rtx_synack,
835 836
	.send_ack	=	tcp_v6_reqsk_send_ack,
	.destructor	=	tcp_v6_reqsk_destructor,
837
	.send_reset	=	tcp_v6_send_reset,
W
Wang Yufen 已提交
838
	.syn_ack_timeout =	tcp_syn_ack_timeout,
L
Linus Torvalds 已提交
839 840
};

841
const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
842 843
	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
				sizeof(struct ipv6hdr),
844
#ifdef CONFIG_TCP_MD5SIG
845
	.req_md5_lookup	=	tcp_v6_md5_lookup,
846
	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
847
#endif
848 849 850
#ifdef CONFIG_SYN_COOKIES
	.cookie_init_seq =	cookie_v6_init_sequence,
#endif
851
	.route_req	=	tcp_v6_route_req,
852 853
	.init_seq	=	tcp_v6_init_seq,
	.init_ts_off	=	tcp_v6_init_ts_off,
854
	.send_synack	=	tcp_v6_send_synack,
855
};
856

857
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
858 859
				 u32 ack, u32 win, u32 tsval, u32 tsecr,
				 int oif, struct tcp_md5sig_key *key, int rst,
860
				 u8 tclass, __be32 label, u32 priority, u32 txhash)
L
Linus Torvalds 已提交
861
{
862 863
	const struct tcphdr *th = tcp_hdr(skb);
	struct tcphdr *t1;
L
Linus Torvalds 已提交
864
	struct sk_buff *buff;
865
	struct flowi6 fl6;
866
	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
867
	struct sock *ctl_sk = net->ipv6.tcp_sk;
868
	unsigned int tot_len = sizeof(struct tcphdr);
869
	__be32 mrst = 0, *topt;
E
Eric Dumazet 已提交
870
	struct dst_entry *dst;
J
Jon Maxwell 已提交
871
	__u32 mark = 0;
L
Linus Torvalds 已提交
872

873
	if (tsecr)
874
		tot_len += TCPOLEN_TSTAMP_ALIGNED;
875 876 877 878 879
#ifdef CONFIG_TCP_MD5SIG
	if (key)
		tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif

880 881 882 883 884 885 886 887 888
#ifdef CONFIG_MPTCP
	if (rst && !key) {
		mrst = mptcp_reset_option(skb);

		if (mrst)
			tot_len += sizeof(__be32);
	}
#endif

889
	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
890
	if (!buff)
891
		return;
L
Linus Torvalds 已提交
892

893
	skb_reserve(buff, MAX_TCP_HEADER);
L
Linus Torvalds 已提交
894

895
	t1 = skb_push(buff, tot_len);
896
	skb_reset_transport_header(buff);
L
Linus Torvalds 已提交
897 898 899 900 901

	/* Swap the send and the receive. */
	memset(t1, 0, sizeof(*t1));
	t1->dest = th->source;
	t1->source = th->dest;
902
	t1->doff = tot_len / 4;
903 904 905 906 907
	t1->seq = htonl(seq);
	t1->ack_seq = htonl(ack);
	t1->ack = !rst || !th->ack;
	t1->rst = rst;
	t1->window = htons(win);
L
Linus Torvalds 已提交
908

909 910
	topt = (__be32 *)(t1 + 1);

911
	if (tsecr) {
912 913
		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
914 915
		*topt++ = htonl(tsval);
		*topt++ = htonl(tsecr);
916 917
	}

918 919 920
	if (mrst)
		*topt++ = mrst;

921 922
#ifdef CONFIG_TCP_MD5SIG
	if (key) {
923 924 925
		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
926 927
				    &ipv6_hdr(skb)->saddr,
				    &ipv6_hdr(skb)->daddr, t1);
928 929 930
	}
#endif

931
	memset(&fl6, 0, sizeof(fl6));
A
Alexey Dobriyan 已提交
932 933
	fl6.daddr = ipv6_hdr(skb)->saddr;
	fl6.saddr = ipv6_hdr(skb)->daddr;
934
	fl6.flowlabel = label;
L
Linus Torvalds 已提交
935

936 937
	buff->ip_summed = CHECKSUM_PARTIAL;

938
	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
L
Linus Torvalds 已提交
939

940
	fl6.flowi6_proto = IPPROTO_TCP;
941
	if (rt6_need_strict(&fl6.daddr) && !oif)
E
Eric Dumazet 已提交
942
		fl6.flowi6_oif = tcp_v6_iif(skb);
943 944 945 946 947 948
	else {
		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
			oif = skb->skb_iif;

		fl6.flowi6_oif = oif;
	}
949

950
	if (sk) {
951
		if (sk->sk_state == TCP_TIME_WAIT)
952
			mark = inet_twsk(sk)->tw_mark;
953
		else
954
			mark = sk->sk_mark;
955
		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
956
	}
957 958 959 960
	if (txhash) {
		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
	}
J
Jon Maxwell 已提交
961
	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
962 963
	fl6.fl6_dport = t1->dest;
	fl6.fl6_sport = t1->source;
964
	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
965
	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
L
Linus Torvalds 已提交
966

967 968 969 970
	/* Pass a socket to ip6_dst_lookup either it is for RST
	 * Underlying function will use this to retrieve the network
	 * namespace
	 */
971 972 973 974
	if (sk && sk->sk_state != TCP_TIME_WAIT)
		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
	else
		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
975 976
	if (!IS_ERR(dst)) {
		skb_dst_set(buff, dst);
977 978
		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
			 tclass & ~INET_ECN_MASK, priority);
979
		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
980
		if (rst)
981
			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
982
		return;
L
Linus Torvalds 已提交
983 984 985 986 987
	}

	kfree_skb(buff);
}

988
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
989
{
990
	const struct tcphdr *th = tcp_hdr(skb);
991
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
992
	u32 seq = 0, ack_seq = 0;
993
	struct tcp_md5sig_key *key = NULL;
994 995 996 997 998 999
#ifdef CONFIG_TCP_MD5SIG
	const __u8 *hash_location = NULL;
	unsigned char newhash[16];
	int genhash;
	struct sock *sk1 = NULL;
#endif
1000
	__be32 label = 0;
1001
	u32 priority = 0;
1002
	struct net *net;
1003
	int oif = 0;
L
Linus Torvalds 已提交
1004

1005
	if (th->rst)
L
Linus Torvalds 已提交
1006 1007
		return;

1008 1009 1010 1011
	/* If sk not NULL, it means we did a successful lookup and incoming
	 * route had to be correct. prequeue might have dropped our dst.
	 */
	if (!sk && !ipv6_unicast_destination(skb))
1012
		return;
L
Linus Torvalds 已提交
1013

1014
	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1015
#ifdef CONFIG_TCP_MD5SIG
1016
	rcu_read_lock();
1017
	hash_location = tcp_parse_md5sig_option(th);
1018
	if (sk && sk_fullsock(sk)) {
1019 1020 1021 1022 1023 1024 1025
		int l3index;

		/* sdif set, means packet ingressed via a device
		 * in an L3 domain and inet_iif is set to it.
		 */
		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1026
	} else if (hash_location) {
1027 1028
		int dif = tcp_v6_iif_l3_slave(skb);
		int sdif = tcp_v6_sdif(skb);
1029
		int l3index;
1030

1031 1032 1033 1034 1035 1036 1037
		/*
		 * active side is lost. Try to find listening socket through
		 * source port, and then find md5 key through listening socket.
		 * we are not loose security here:
		 * Incoming packet is checked with md5 hash with finding key,
		 * no RST generated if md5 hash doesn't match.
		 */
1038
		sk1 = inet6_lookup_listener(net,
1039 1040
					   &tcp_hashinfo, NULL, 0,
					   &ipv6h->saddr,
1041
					   th->source, &ipv6h->daddr,
1042
					   ntohs(th->source), dif, sdif);
1043
		if (!sk1)
1044
			goto out;
1045

1046 1047 1048 1049 1050 1051
		/* sdif set, means packet ingressed via a device
		 * in an L3 domain and dif is set to it.
		 */
		l3index = tcp_v6_sdif(skb) ? dif : 0;

		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1052
		if (!key)
1053
			goto out;
1054

1055
		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1056
		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1057
			goto out;
1058
	}
1059 1060
#endif

1061 1062 1063 1064 1065
	if (th->ack)
		seq = ntohl(th->ack_seq);
	else
		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
			  (th->doff << 2);
L
Linus Torvalds 已提交
1066

1067 1068
	if (sk) {
		oif = sk->sk_bound_dev_if;
1069 1070 1071
		if (sk_fullsock(sk)) {
			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);

1072
			trace_tcp_send_reset(sk, skb);
1073 1074
			if (np->repflow)
				label = ip6_flowlabel(ipv6h);
1075
			priority = sk->sk_priority;
1076
		}
1077
		if (sk->sk_state == TCP_TIME_WAIT) {
1078
			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1079 1080
			priority = inet_twsk(sk)->tw_priority;
		}
1081
	} else {
1082
		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1083
			label = ip6_flowlabel(ipv6h);
1084 1085
	}

1086
	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1087
			     ipv6_get_dsfield(ipv6h), label, priority, 0);
1088 1089

#ifdef CONFIG_TCP_MD5SIG
1090 1091
out:
	rcu_read_unlock();
1092
#endif
1093
}
L
Linus Torvalds 已提交
1094

1095
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1096
			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1097
			    struct tcp_md5sig_key *key, u8 tclass,
1098
			    __be32 label, u32 priority, u32 txhash)
1099
{
1100
	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1101
			     tclass, label, priority, txhash);
L
Linus Torvalds 已提交
1102 1103 1104 1105
}

static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
1106
	struct inet_timewait_sock *tw = inet_twsk(sk);
1107
	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
L
Linus Torvalds 已提交
1108

1109
	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1110
			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1111
			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
W
Wang Yufen 已提交
1112
			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1113 1114
			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
			tw->tw_txhash);
L
Linus Torvalds 已提交
1115

1116
	inet_twsk_put(tw);
L
Linus Torvalds 已提交
1117 1118
}

1119
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1120
				  struct request_sock *req)
L
Linus Torvalds 已提交
1121
{
1122 1123 1124 1125
	int l3index;

	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;

1126 1127 1128
	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
	 */
1129 1130 1131 1132 1133
	/* RFC 7323 2.3
	 * The window field (SEG.WND) of every outgoing segment, with the
	 * exception of <SYN> segments, MUST be right-shifted by
	 * Rcv.Wind.Shift bits:
	 */
1134
	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1135
			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1136 1137
			tcp_rsk(req)->rcv_nxt,
			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1138
			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1139
			req->ts_recent, sk->sk_bound_dev_if,
1140
			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1141 1142
			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
			tcp_rsk(req)->txhash);
L
Linus Torvalds 已提交
1143 1144 1145
}


1146
static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
1147
{
1148
#ifdef CONFIG_SYN_COOKIES
1149
	const struct tcphdr *th = tcp_hdr(skb);
L
Linus Torvalds 已提交
1150

1151
	if (!th->syn)
1152
		sk = cookie_v6_check(sk, skb);
L
Linus Torvalds 已提交
1153 1154 1155 1156
#endif
	return sk;
}

1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
			 struct tcphdr *th, u32 *cookie)
{
	u16 mss = 0;
#ifdef CONFIG_SYN_COOKIES
	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
				    &tcp_request_sock_ipv6_ops, sk, th);
	if (mss) {
		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
		tcp_synq_overflow(sk);
	}
#endif
	return mss;
}

L
Linus Torvalds 已提交
1172 1173 1174 1175 1176 1177
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
	if (skb->protocol == htons(ETH_P_IP))
		return tcp_v4_conn_request(sk, skb);

	if (!ipv6_unicast_destination(skb))
1178
		goto drop;
L
Linus Torvalds 已提交
1179

1180 1181 1182 1183 1184
	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
		return 0;
	}

O
Octavian Purdila 已提交
1185 1186
	return tcp_conn_request(&tcp6_request_sock_ops,
				&tcp_request_sock_ipv6_ops, sk, skb);
L
Linus Torvalds 已提交
1187 1188

drop:
1189
	tcp_listendrop(sk);
L
Linus Torvalds 已提交
1190 1191 1192
	return 0; /* don't send reset */
}

1193 1194 1195 1196 1197 1198 1199 1200 1201 1202
static void tcp_v6_restore_cb(struct sk_buff *skb)
{
	/* We need to move header back to the beginning if xfrm6_policy_check()
	 * and tcp_v6_fill_cb() are going to be called again.
	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
	 */
	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
		sizeof(struct inet6_skb_parm));
}

1203
static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
W
Weilong Chen 已提交
1204
					 struct request_sock *req,
1205 1206 1207
					 struct dst_entry *dst,
					 struct request_sock *req_unhash,
					 bool *own_req)
L
Linus Torvalds 已提交
1208
{
1209
	struct inet_request_sock *ireq;
1210
	struct ipv6_pinfo *newnp;
E
Eric Dumazet 已提交
1211
	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1212
	struct ipv6_txoptions *opt;
L
Linus Torvalds 已提交
1213
	struct inet_sock *newinet;
1214
	bool found_dup_sk = false;
L
Linus Torvalds 已提交
1215 1216
	struct tcp_sock *newtp;
	struct sock *newsk;
1217 1218
#ifdef CONFIG_TCP_MD5SIG
	struct tcp_md5sig_key *key;
1219
	int l3index;
1220
#endif
1221
	struct flowi6 fl6;
L
Linus Torvalds 已提交
1222 1223 1224 1225 1226 1227

	if (skb->protocol == htons(ETH_P_IP)) {
		/*
		 *	v6 mapped
		 */

1228 1229
		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
					     req_unhash, own_req);
L
Linus Torvalds 已提交
1230

1231
		if (!newsk)
L
Linus Torvalds 已提交
1232 1233
			return NULL;

E
Eric Dumazet 已提交
1234
		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1235

E
Eric Dumazet 已提交
1236
		newnp = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1237 1238 1239 1240
		newtp = tcp_sk(newsk);

		memcpy(newnp, np, sizeof(struct ipv6_pinfo));

1241
		newnp->saddr = newsk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
1242

1243
		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1244
		if (sk_is_mptcp(newsk))
1245
			mptcpv6_handle_mapped(newsk, true);
L
Linus Torvalds 已提交
1246
		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1247 1248 1249 1250
#ifdef CONFIG_TCP_MD5SIG
		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif

1251
		newnp->ipv6_mc_list = NULL;
1252 1253
		newnp->ipv6_ac_list = NULL;
		newnp->ipv6_fl_list = NULL;
L
Linus Torvalds 已提交
1254 1255
		newnp->pktoptions  = NULL;
		newnp->opt	   = NULL;
1256 1257 1258
		newnp->mcast_oif   = inet_iif(skb);
		newnp->mcast_hops  = ip_hdr(skb)->ttl;
		newnp->rcv_flowinfo = 0;
1259
		if (np->repflow)
1260
			newnp->flow_label = 0;
L
Linus Torvalds 已提交
1261

1262 1263 1264 1265
		/*
		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
		 * here, tcp_create_openreq_child now does this for us, see the comment in
		 * that function for the gory details. -acme
L
Linus Torvalds 已提交
1266 1267 1268
		 */

		/* It is tricky place. Until this moment IPv4 tcp
1269
		   worked with IPv6 icsk.icsk_af_ops.
L
Linus Torvalds 已提交
1270 1271
		   Sync it now.
		 */
1272
		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
L
Linus Torvalds 已提交
1273 1274 1275 1276

		return newsk;
	}

1277
	ireq = inet_rsk(req);
L
Linus Torvalds 已提交
1278 1279 1280 1281

	if (sk_acceptq_is_full(sk))
		goto out_overflow;

1282
	if (!dst) {
1283
		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1284
		if (!dst)
L
Linus Torvalds 已提交
1285
			goto out;
1286
	}
L
Linus Torvalds 已提交
1287 1288

	newsk = tcp_create_openreq_child(sk, req, skb);
1289
	if (!newsk)
1290
		goto out_nonewsk;
L
Linus Torvalds 已提交
1291

1292 1293 1294 1295 1296
	/*
	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
	 * count here, tcp_create_openreq_child now does this for us, see the
	 * comment in that function for the gory details. -acme
	 */
L
Linus Torvalds 已提交
1297

1298
	newsk->sk_gso_type = SKB_GSO_TCPV6;
E
Eric Dumazet 已提交
1299
	ip6_dst_store(newsk, dst, NULL, NULL);
1300
	inet6_sk_rx_dst_set(newsk, skb);
L
Linus Torvalds 已提交
1301

E
Eric Dumazet 已提交
1302
	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1303 1304 1305

	newtp = tcp_sk(newsk);
	newinet = inet_sk(newsk);
E
Eric Dumazet 已提交
1306
	newnp = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1307 1308 1309

	memcpy(newnp, np, sizeof(struct ipv6_pinfo));

1310 1311 1312 1313
	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
	newnp->saddr = ireq->ir_v6_loc_addr;
	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
	newsk->sk_bound_dev_if = ireq->ir_iif;
L
Linus Torvalds 已提交
1314

1315
	/* Now IPv6 options...
L
Linus Torvalds 已提交
1316 1317 1318

	   First: no IPv4 options.
	 */
1319
	newinet->inet_opt = NULL;
1320
	newnp->ipv6_mc_list = NULL;
1321
	newnp->ipv6_ac_list = NULL;
1322
	newnp->ipv6_fl_list = NULL;
L
Linus Torvalds 已提交
1323 1324 1325 1326 1327 1328

	/* Clone RX bits */
	newnp->rxopt.all = np->rxopt.all;

	newnp->pktoptions = NULL;
	newnp->opt	  = NULL;
E
Eric Dumazet 已提交
1329
	newnp->mcast_oif  = tcp_v6_iif(skb);
1330
	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1331
	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1332 1333
	if (np->repflow)
		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
L
Linus Torvalds 已提交
1334

W
Wei Wang 已提交
1335 1336 1337
	/* Set ToS of the new socket based upon the value of incoming SYN.
	 * ECT bits are set later in tcp_init_transfer().
	 */
1338
	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1339 1340
		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;

L
Linus Torvalds 已提交
1341 1342 1343 1344 1345 1346
	/* Clone native IPv6 options from listening socket (if any)

	   Yes, keeping reference count would be much more clever,
	   but we make one more one thing there: reattach optmem
	   to newsk.
	 */
1347 1348 1349
	opt = ireq->ipv6_opt;
	if (!opt)
		opt = rcu_dereference(np->opt);
1350 1351 1352 1353
	if (opt) {
		opt = ipv6_dup_options(newsk, opt);
		RCU_INIT_POINTER(newnp->opt, opt);
	}
1354
	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1355 1356 1357
	if (opt)
		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
						    opt->opt_flen;
L
Linus Torvalds 已提交
1358

1359 1360
	tcp_ca_openreq_child(newsk, dst);

L
Linus Torvalds 已提交
1361
	tcp_sync_mss(newsk, dst_mtu(dst));
E
Eric Dumazet 已提交
1362
	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1363

L
Linus Torvalds 已提交
1364 1365
	tcp_initialize_rcv_mss(newsk);

E
Eric Dumazet 已提交
1366 1367
	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
L
Linus Torvalds 已提交
1368

1369
#ifdef CONFIG_TCP_MD5SIG
1370 1371
	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);

1372
	/* Copy over the MD5 key from the original socket */
1373
	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1374
	if (key) {
1375 1376 1377 1378 1379
		/* We're using one, so create a matching key
		 * on the newsk structure. If we fail to get
		 * memory, then we end up not copying the key
		 * across. Shucks.
		 */
1380
		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1381
			       AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1382
			       sk_gfp_mask(sk, GFP_ATOMIC));
1383 1384 1385
	}
#endif

1386
	if (__inet_inherit_port(sk, newsk) < 0) {
1387 1388
		inet_csk_prepare_forced_close(newsk);
		tcp_done(newsk);
1389 1390
		goto out;
	}
1391 1392
	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
				       &found_dup_sk);
E
Eric Dumazet 已提交
1393
	if (*own_req) {
1394
		tcp_move_syn(newtp, req);
E
Eric Dumazet 已提交
1395 1396 1397 1398

		/* Clone pktoptions received with SYN, if we own the req */
		if (ireq->pktopts) {
			newnp->pktoptions = skb_clone(ireq->pktopts,
1399
						      sk_gfp_mask(sk, GFP_ATOMIC));
E
Eric Dumazet 已提交
1400 1401
			consume_skb(ireq->pktopts);
			ireq->pktopts = NULL;
1402 1403
			if (newnp->pktoptions) {
				tcp_v6_restore_cb(newnp->pktoptions);
E
Eric Dumazet 已提交
1404
				skb_set_owner_r(newnp->pktoptions, newsk);
1405
			}
E
Eric Dumazet 已提交
1406
		}
1407 1408 1409 1410 1411 1412 1413 1414 1415
	} else {
		if (!req_unhash && found_dup_sk) {
			/* This code path should only be executed in the
			 * syncookie case only
			 */
			bh_unlock_sock(newsk);
			sock_put(newsk);
			newsk = NULL;
		}
E
Eric Dumazet 已提交
1416
	}
L
Linus Torvalds 已提交
1417 1418 1419 1420

	return newsk;

out_overflow:
1421
	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1422
out_nonewsk:
L
Linus Torvalds 已提交
1423
	dst_release(dst);
1424
out:
1425
	tcp_listendrop(sk);
L
Linus Torvalds 已提交
1426 1427 1428
	return NULL;
}

1429 1430
INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
							   u32));
L
Linus Torvalds 已提交
1431
/* The socket must have it's spinlock held when we get
1432
 * here, unless it is a TCP_LISTEN socket.
L
Linus Torvalds 已提交
1433 1434 1435 1436 1437 1438
 *
 * We have a potential double-lock case here, so even when
 * doing backlog processing we use the BH locking scheme.
 * This is because we cannot sleep with the original spinlock
 * held.
 */
1439 1440
INDIRECT_CALLABLE_SCOPE
int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
1441
{
E
Eric Dumazet 已提交
1442
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
L
Linus Torvalds 已提交
1443
	struct sk_buff *opt_skb = NULL;
1444
	enum skb_drop_reason reason;
E
Eric Dumazet 已提交
1445
	struct tcp_sock *tp;
L
Linus Torvalds 已提交
1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473

	/* Imagine: socket is IPv6. IPv4 packet arrives,
	   goes to IPv4 receive handler and backlogged.
	   From backlog it always goes here. Kerboom...
	   Fortunately, tcp_rcv_established and rcv_established
	   handle them correctly, but it is not case with
	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
	 */

	if (skb->protocol == htons(ETH_P_IP))
		return tcp_v4_do_rcv(sk, skb);

	/*
	 *	socket locking is here for SMP purposes as backlog rcv
	 *	is currently called with bh processing disabled.
	 */

	/* Do Stevens' IPV6_PKTOPTIONS.

	   Yes, guys, it is the only place in our code, where we
	   may make it not affecting IPv4.
	   The rest of code is protocol independent,
	   and I do not like idea to uglify IPv4.

	   Actually, all the idea behind IPV6_PKTOPTIONS
	   looks not very well thought. For now we latch
	   options, received in the last packet, enqueued
	   by tcp. Feel free to propose better solution.
1474
					       --ANK (980728)
L
Linus Torvalds 已提交
1475 1476
	 */
	if (np->rxopt.all)
1477
		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
L
Linus Torvalds 已提交
1478

1479
	reason = SKB_DROP_REASON_NOT_SPECIFIED;
L
Linus Torvalds 已提交
1480
	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1481 1482 1483 1484
		struct dst_entry *dst;

		dst = rcu_dereference_protected(sk->sk_rx_dst,
						lockdep_sock_is_held(sk));
E
Eric Dumazet 已提交
1485

1486
		sock_rps_save_rxhash(sk, skb);
1487
		sk_mark_napi_id(sk, skb);
E
Eric Dumazet 已提交
1488
		if (dst) {
1489
			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1490
			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1491
					    dst, sk->sk_rx_dst_cookie) == NULL) {
1492
				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
E
Eric Dumazet 已提交
1493 1494 1495 1496
				dst_release(dst);
			}
		}

1497
		tcp_rcv_established(sk, skb);
L
Linus Torvalds 已提交
1498 1499 1500 1501 1502
		if (opt_skb)
			goto ipv6_pktoptions;
		return 0;
	}

E
Eric Dumazet 已提交
1503
	if (tcp_checksum_complete(skb))
L
Linus Torvalds 已提交
1504 1505
		goto csum_err;

1506
	if (sk->sk_state == TCP_LISTEN) {
1507 1508
		struct sock *nsk = tcp_v6_cookie_check(sk, skb);

L
Linus Torvalds 已提交
1509 1510 1511
		if (!nsk)
			goto discard;

W
Weilong Chen 已提交
1512
		if (nsk != sk) {
L
Linus Torvalds 已提交
1513 1514 1515 1516 1517 1518
			if (tcp_child_process(sk, nsk, skb))
				goto reset;
			if (opt_skb)
				__kfree_skb(opt_skb);
			return 0;
		}
1519
	} else
1520
		sock_rps_save_rxhash(sk, skb);
L
Linus Torvalds 已提交
1521

1522
	if (tcp_rcv_state_process(sk, skb))
L
Linus Torvalds 已提交
1523 1524 1525 1526 1527 1528
		goto reset;
	if (opt_skb)
		goto ipv6_pktoptions;
	return 0;

reset:
1529
	tcp_v6_send_reset(sk, skb);
L
Linus Torvalds 已提交
1530 1531 1532
discard:
	if (opt_skb)
		__kfree_skb(opt_skb);
1533
	kfree_skb_reason(skb, reason);
L
Linus Torvalds 已提交
1534 1535
	return 0;
csum_err:
1536
	reason = SKB_DROP_REASON_TCP_CSUM;
1537
	trace_tcp_bad_csum(skb);
1538 1539
	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
L
Linus Torvalds 已提交
1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553
	goto discard;


ipv6_pktoptions:
	/* Do you ask, what is it?

	   1. skb was enqueued by tcp.
	   2. skb is added to tail of read queue, rather than out of order.
	   3. socket is not in passive state.
	   4. Finally, it really contains options, which user wants to receive.
	 */
	tp = tcp_sk(sk);
	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1554
		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
E
Eric Dumazet 已提交
1555
			np->mcast_oif = tcp_v6_iif(opt_skb);
1556
		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1557
			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1558
		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1559
			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1560 1561
		if (np->repflow)
			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1562
		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
L
Linus Torvalds 已提交
1563
			skb_set_owner_r(opt_skb, sk);
1564
			tcp_v6_restore_cb(opt_skb);
L
Linus Torvalds 已提交
1565 1566 1567 1568 1569 1570 1571
			opt_skb = xchg(&np->pktoptions, opt_skb);
		} else {
			__kfree_skb(opt_skb);
			opt_skb = xchg(&np->pktoptions, NULL);
		}
	}

1572
	consume_skb(opt_skb);
L
Linus Torvalds 已提交
1573 1574 1575
	return 0;
}

1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
			   const struct tcphdr *th)
{
	/* This is tricky: we move IP6CB at its correct location into
	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
	 * _decode_session6() uses IP6CB().
	 * barrier() makes sure compiler won't play aliasing games.
	 */
	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
		sizeof(struct inet6_skb_parm));
	barrier();

	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
				    skb->len - th->doff*4);
	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
	TCP_SKB_CB(skb)->sacked = 0;
1596 1597
	TCP_SKB_CB(skb)->has_rxtstamp =
			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1598 1599
}

1600
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
1601
{
1602
	enum skb_drop_reason drop_reason;
1603
	int sdif = inet6_sdif(skb);
1604
	int dif = inet6_iif(skb);
1605
	const struct tcphdr *th;
1606
	const struct ipv6hdr *hdr;
1607
	bool refcounted;
L
Linus Torvalds 已提交
1608 1609
	struct sock *sk;
	int ret;
1610
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
1611

1612
	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
L
Linus Torvalds 已提交
1613 1614 1615 1616 1617 1618
	if (skb->pkt_type != PACKET_HOST)
		goto discard_it;

	/*
	 *	Count it even if it's bad.
	 */
E
Eric Dumazet 已提交
1619
	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
L
Linus Torvalds 已提交
1620 1621 1622 1623

	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
		goto discard_it;

1624
	th = (const struct tcphdr *)skb->data;
L
Linus Torvalds 已提交
1625

1626 1627
	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
L
Linus Torvalds 已提交
1628
		goto bad_packet;
1629
	}
L
Linus Torvalds 已提交
1630 1631 1632
	if (!pskb_may_pull(skb, th->doff*4))
		goto discard_it;

1633
	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1634
		goto csum_error;
L
Linus Torvalds 已提交
1635

1636
	th = (const struct tcphdr *)skb->data;
1637
	hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
1638

1639
lookup:
1640
	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1641
				th->source, th->dest, inet6_iif(skb), sdif,
1642
				&refcounted);
L
Linus Torvalds 已提交
1643 1644 1645 1646 1647 1648 1649
	if (!sk)
		goto no_tcp_socket;

process:
	if (sk->sk_state == TCP_TIME_WAIT)
		goto do_time_wait;

1650 1651
	if (sk->sk_state == TCP_NEW_SYN_RECV) {
		struct request_sock *req = inet_reqsk(sk);
1652
		bool req_stolen = false;
1653
		struct sock *nsk;
1654 1655

		sk = req->rsk_listener;
1656 1657 1658 1659
		drop_reason = tcp_inbound_md5_hash(sk, skb,
						   &hdr->saddr, &hdr->daddr,
						   AF_INET6, dif, sdif);
		if (drop_reason) {
1660
			sk_drops_add(sk, skb);
1661 1662 1663
			reqsk_put(req);
			goto discard_it;
		}
1664 1665 1666 1667
		if (tcp_checksum_complete(skb)) {
			reqsk_put(req);
			goto csum_error;
		}
1668
		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679
			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
			if (!nsk) {
				inet_csk_reqsk_queue_drop_and_put(sk, req);
				goto lookup;
			}
			sk = nsk;
			/* reuseport_migrate_sock() has already held one sk_refcnt
			 * before returning.
			 */
		} else {
			sock_hold(sk);
1680
		}
1681
		refcounted = true;
E
Eric Dumazet 已提交
1682
		nsk = NULL;
1683 1684 1685 1686
		if (!tcp_filter(sk, skb)) {
			th = (const struct tcphdr *)skb->data;
			hdr = ipv6_hdr(skb);
			tcp_v6_fill_cb(skb, hdr, th);
1687
			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1688 1689
		} else {
			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1690
		}
1691 1692
		if (!nsk) {
			reqsk_put(req);
1693 1694 1695 1696 1697 1698 1699 1700 1701 1702
			if (req_stolen) {
				/* Another cpu got exclusive access to req
				 * and created a full blown socket.
				 * Try to feed this packet to this socket
				 * instead of discarding it.
				 */
				tcp_v6_restore_cb(skb);
				sock_put(sk);
				goto lookup;
			}
1703
			goto discard_and_relse;
1704 1705 1706 1707 1708 1709
		}
		if (nsk == sk) {
			reqsk_put(req);
			tcp_v6_restore_cb(skb);
		} else if (tcp_child_process(sk, nsk, skb)) {
			tcp_v6_send_reset(nsk, skb);
1710
			goto discard_and_relse;
1711
		} else {
1712
			sock_put(sk);
1713 1714 1715
			return 0;
		}
	}
1716 1717 1718 1719 1720 1721 1722

	if (static_branch_unlikely(&ip6_min_hopcount)) {
		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
		if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
			goto discard_and_relse;
		}
1723 1724
	}

1725 1726
	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
L
Linus Torvalds 已提交
1727
		goto discard_and_relse;
1728
	}
L
Linus Torvalds 已提交
1729

1730 1731 1732
	drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
					   AF_INET6, dif, sdif);
	if (drop_reason)
1733 1734
		goto discard_and_relse;

1735 1736
	if (tcp_filter(sk, skb)) {
		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
L
Linus Torvalds 已提交
1737
		goto discard_and_relse;
1738
	}
1739 1740
	th = (const struct tcphdr *)skb->data;
	hdr = ipv6_hdr(skb);
1741
	tcp_v6_fill_cb(skb, hdr, th);
L
Linus Torvalds 已提交
1742 1743 1744

	skb->dev = NULL;

1745 1746 1747 1748 1749 1750 1751
	if (sk->sk_state == TCP_LISTEN) {
		ret = tcp_v6_do_rcv(sk, skb);
		goto put_and_return;
	}

	sk_incoming_cpu_update(sk);

1752
	bh_lock_sock_nested(sk);
1753
	tcp_segs_in(tcp_sk(sk), skb);
L
Linus Torvalds 已提交
1754 1755
	ret = 0;
	if (!sock_owned_by_user(sk)) {
F
Florian Westphal 已提交
1756
		ret = tcp_v6_do_rcv(sk, skb);
E
Eric Dumazet 已提交
1757
	} else {
1758
		if (tcp_add_backlog(sk, skb, &drop_reason))
E
Eric Dumazet 已提交
1759
			goto discard_and_relse;
Z
Zhu Yi 已提交
1760
	}
L
Linus Torvalds 已提交
1761
	bh_unlock_sock(sk);
1762
put_and_return:
1763 1764
	if (refcounted)
		sock_put(sk);
L
Linus Torvalds 已提交
1765 1766 1767
	return ret ? -1 : 0;

no_tcp_socket:
1768
	drop_reason = SKB_DROP_REASON_NO_SOCKET;
L
Linus Torvalds 已提交
1769 1770 1771
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
		goto discard_it;

1772 1773
	tcp_v6_fill_cb(skb, hdr, th);

E
Eric Dumazet 已提交
1774
	if (tcp_checksum_complete(skb)) {
1775
csum_error:
1776
		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1777
		trace_tcp_bad_csum(skb);
E
Eric Dumazet 已提交
1778
		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
L
Linus Torvalds 已提交
1779
bad_packet:
E
Eric Dumazet 已提交
1780
		__TCP_INC_STATS(net, TCP_MIB_INERRS);
L
Linus Torvalds 已提交
1781
	} else {
1782
		tcp_v6_send_reset(NULL, skb);
L
Linus Torvalds 已提交
1783 1784 1785
	}

discard_it:
1786
	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1787
	kfree_skb_reason(skb, drop_reason);
L
Linus Torvalds 已提交
1788 1789 1790
	return 0;

discard_and_relse:
1791
	sk_drops_add(sk, skb);
1792 1793
	if (refcounted)
		sock_put(sk);
L
Linus Torvalds 已提交
1794 1795 1796 1797
	goto discard_it;

do_time_wait:
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1798
		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1799
		inet_twsk_put(inet_twsk(sk));
L
Linus Torvalds 已提交
1800 1801 1802
		goto discard_it;
	}

1803 1804
	tcp_v6_fill_cb(skb, hdr, th);

1805 1806 1807
	if (tcp_checksum_complete(skb)) {
		inet_twsk_put(inet_twsk(sk));
		goto csum_error;
L
Linus Torvalds 已提交
1808 1809
	}

1810
	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
L
Linus Torvalds 已提交
1811 1812 1813 1814
	case TCP_TW_SYN:
	{
		struct sock *sk2;

1815
		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1816
					    skb, __tcp_hdrlen(th),
1817
					    &ipv6_hdr(skb)->saddr, th->source,
1818
					    &ipv6_hdr(skb)->daddr,
1819 1820
					    ntohs(th->dest),
					    tcp_v6_iif_l3_slave(skb),
1821
					    sdif);
1822
		if (sk2) {
1823
			struct inet_timewait_sock *tw = inet_twsk(sk);
1824
			inet_twsk_deschedule_put(tw);
L
Linus Torvalds 已提交
1825
			sk = sk2;
1826
			tcp_v6_restore_cb(skb);
1827
			refcounted = false;
L
Linus Torvalds 已提交
1828 1829 1830
			goto process;
		}
	}
1831
		/* to ACK */
J
Joe Perches 已提交
1832
		fallthrough;
L
Linus Torvalds 已提交
1833 1834 1835 1836
	case TCP_TW_ACK:
		tcp_v6_timewait_ack(sk, skb);
		break;
	case TCP_TW_RST:
1837 1838 1839
		tcp_v6_send_reset(sk, skb);
		inet_twsk_deschedule_put(inet_twsk(sk));
		goto discard_it;
W
Wang Yufen 已提交
1840 1841
	case TCP_TW_SUCCESS:
		;
L
Linus Torvalds 已提交
1842 1843 1844 1845
	}
	goto discard_it;
}

1846
void tcp_v6_early_demux(struct sk_buff *skb)
E
Eric Dumazet 已提交
1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863
{
	const struct ipv6hdr *hdr;
	const struct tcphdr *th;
	struct sock *sk;

	if (skb->pkt_type != PACKET_HOST)
		return;

	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
		return;

	hdr = ipv6_hdr(skb);
	th = tcp_hdr(skb);

	if (th->doff < sizeof(struct tcphdr) / 4)
		return;

E
Eric Dumazet 已提交
1864
	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
E
Eric Dumazet 已提交
1865 1866 1867
	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
					&hdr->saddr, th->source,
					&hdr->daddr, ntohs(th->dest),
1868
					inet6_iif(skb), inet6_sdif(skb));
E
Eric Dumazet 已提交
1869 1870 1871
	if (sk) {
		skb->sk = sk;
		skb->destructor = sock_edemux;
1872
		if (sk_fullsock(sk)) {
1873
			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1874

E
Eric Dumazet 已提交
1875
			if (dst)
1876
				dst = dst_check(dst, sk->sk_rx_dst_cookie);
E
Eric Dumazet 已提交
1877
			if (dst &&
1878
			    sk->sk_rx_dst_ifindex == skb->skb_iif)
E
Eric Dumazet 已提交
1879 1880 1881 1882 1883
				skb_dst_set_noref(skb, dst);
		}
	}
}

1884 1885 1886
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
	.twsk_unique	= tcp_twsk_unique,
W
Wang Yufen 已提交
1887
	.twsk_destructor = tcp_twsk_destructor,
1888 1889
};

1890 1891
INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
{
1892
	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1893 1894
}

1895
const struct inet_connection_sock_af_ops ipv6_specific = {
1896 1897 1898
	.queue_xmit	   = inet6_csk_xmit,
	.send_check	   = tcp_v6_send_check,
	.rebuild_header	   = inet6_sk_rebuild_header,
E
Eric Dumazet 已提交
1899
	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1900 1901 1902
	.conn_request	   = tcp_v6_conn_request,
	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
	.net_header_len	   = sizeof(struct ipv6hdr),
1903
	.net_frag_header_len = sizeof(struct frag_hdr),
1904 1905 1906 1907
	.setsockopt	   = ipv6_setsockopt,
	.getsockopt	   = ipv6_getsockopt,
	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1908
	.mtu_reduced	   = tcp_v6_mtu_reduced,
L
Linus Torvalds 已提交
1909 1910
};

1911
#ifdef CONFIG_TCP_MD5SIG
1912
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1913
	.md5_lookup	=	tcp_v6_md5_lookup,
1914
	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1915 1916
	.md5_parse	=	tcp_v6_parse_md5_keys,
};
1917
#endif
1918

L
Linus Torvalds 已提交
1919 1920 1921
/*
 *	TCP over IPv4 via INET6 API
 */
1922
static const struct inet_connection_sock_af_ops ipv6_mapped = {
1923 1924 1925
	.queue_xmit	   = ip_queue_xmit,
	.send_check	   = tcp_v4_send_check,
	.rebuild_header	   = inet_sk_rebuild_header,
1926
	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1927 1928 1929 1930 1931 1932 1933
	.conn_request	   = tcp_v6_conn_request,
	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
	.net_header_len	   = sizeof(struct iphdr),
	.setsockopt	   = ipv6_setsockopt,
	.getsockopt	   = ipv6_getsockopt,
	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1934
	.mtu_reduced	   = tcp_v4_mtu_reduced,
L
Linus Torvalds 已提交
1935 1936
};

1937
#ifdef CONFIG_TCP_MD5SIG
1938
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1939
	.md5_lookup	=	tcp_v4_md5_lookup,
1940
	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1941 1942
	.md5_parse	=	tcp_v6_parse_md5_keys,
};
1943
#endif
1944

L
Linus Torvalds 已提交
1945 1946 1947 1948 1949
/* NOTE: A lot of things set to zero explicitly by call to
 *       sk_alloc() so need not be done here.
 */
static int tcp_v6_init_sock(struct sock *sk)
{
1950
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
1951

1952
	tcp_init_sock(sk);
L
Linus Torvalds 已提交
1953

1954
	icsk->icsk_af_ops = &ipv6_specific;
L
Linus Torvalds 已提交
1955

1956
#ifdef CONFIG_TCP_MD5SIG
1957
	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1958 1959
#endif

L
Linus Torvalds 已提交
1960 1961 1962
	return 0;
}

1963
static void tcp_v6_destroy_sock(struct sock *sk)
L
Linus Torvalds 已提交
1964 1965
{
	tcp_v4_destroy_sock(sk);
1966
	inet6_destroy_sock(sk);
L
Linus Torvalds 已提交
1967 1968
}

1969
#ifdef CONFIG_PROC_FS
L
Linus Torvalds 已提交
1970
/* Proc filesystem TCPv6 sock list dumping. */
1971
static void get_openreq6(struct seq_file *seq,
E
Eric Dumazet 已提交
1972
			 const struct request_sock *req, int i)
L
Linus Torvalds 已提交
1973
{
1974
	long ttd = req->rsk_timer.expires - jiffies;
1975 1976
	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
L
Linus Torvalds 已提交
1977 1978 1979 1980 1981 1982

	if (ttd < 0)
		ttd = 0;

	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1983
		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
L
Linus Torvalds 已提交
1984 1985 1986
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3],
E
Eric Dumazet 已提交
1987
		   inet_rsk(req)->ir_num,
L
Linus Torvalds 已提交
1988 1989
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3],
1990
		   ntohs(inet_rsk(req)->ir_rmt_port),
L
Linus Torvalds 已提交
1991
		   TCP_SYN_RECV,
W
Weilong Chen 已提交
1992
		   0, 0, /* could print option size, but that is af dependent. */
1993 1994
		   1,   /* timers active (only the expire timer) */
		   jiffies_to_clock_t(ttd),
1995
		   req->num_timeout,
E
Eric Dumazet 已提交
1996 1997
		   from_kuid_munged(seq_user_ns(seq),
				    sock_i_uid(req->rsk_listener)),
1998
		   0,  /* non standard timer */
L
Linus Torvalds 已提交
1999 2000 2001 2002 2003 2004
		   0, /* open_requests have no inode */
		   0, req);
}

static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
{
2005
	const struct in6_addr *dest, *src;
L
Linus Torvalds 已提交
2006 2007 2008
	__u16 destp, srcp;
	int timer_active;
	unsigned long timer_expires;
2009 2010
	const struct inet_sock *inet = inet_sk(sp);
	const struct tcp_sock *tp = tcp_sk(sp);
2011
	const struct inet_connection_sock *icsk = inet_csk(sp);
2012
	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2013 2014
	int rx_queue;
	int state;
L
Linus Torvalds 已提交
2015

2016 2017
	dest  = &sp->sk_v6_daddr;
	src   = &sp->sk_v6_rcv_saddr;
E
Eric Dumazet 已提交
2018 2019
	destp = ntohs(inet->inet_dport);
	srcp  = ntohs(inet->inet_sport);
2020

2021
	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2022
	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2023
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
L
Linus Torvalds 已提交
2024
		timer_active	= 1;
2025 2026
		timer_expires	= icsk->icsk_timeout;
	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
L
Linus Torvalds 已提交
2027
		timer_active	= 4;
2028
		timer_expires	= icsk->icsk_timeout;
L
Linus Torvalds 已提交
2029 2030 2031 2032 2033 2034 2035 2036
	} else if (timer_pending(&sp->sk_timer)) {
		timer_active	= 2;
		timer_expires	= sp->sk_timer.expires;
	} else {
		timer_active	= 0;
		timer_expires = jiffies;
	}

2037
	state = inet_sk_state_load(sp);
2038
	if (state == TCP_LISTEN)
2039
		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2040 2041 2042 2043
	else
		/* Because we don't lock the socket,
		 * we might find a transient negative value.
		 */
2044
		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2045
				      READ_ONCE(tp->copied_seq), 0);
2046

L
Linus Torvalds 已提交
2047 2048
	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2049
		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
L
Linus Torvalds 已提交
2050 2051 2052 2053 2054
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3], srcp,
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2055
		   state,
2056
		   READ_ONCE(tp->write_seq) - tp->snd_una,
2057
		   rx_queue,
L
Linus Torvalds 已提交
2058
		   timer_active,
2059
		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2060
		   icsk->icsk_retransmits,
2061
		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2062
		   icsk->icsk_probes_out,
L
Linus Torvalds 已提交
2063
		   sock_i_ino(sp),
2064
		   refcount_read(&sp->sk_refcnt), sp,
2065 2066
		   jiffies_to_clock_t(icsk->icsk_rto),
		   jiffies_to_clock_t(icsk->icsk_ack.ato),
W
Wei Wang 已提交
2067
		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2068
		   tcp_snd_cwnd(tp),
2069
		   state == TCP_LISTEN ?
2070
			fastopenq->max_qlen :
2071
			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
L
Linus Torvalds 已提交
2072 2073 2074
		   );
}

2075
static void get_timewait6_sock(struct seq_file *seq,
2076
			       struct inet_timewait_sock *tw, int i)
L
Linus Torvalds 已提交
2077
{
2078
	long delta = tw->tw_timer.expires - jiffies;
2079
	const struct in6_addr *dest, *src;
L
Linus Torvalds 已提交
2080 2081
	__u16 destp, srcp;

2082 2083
	dest = &tw->tw_v6_daddr;
	src  = &tw->tw_v6_rcv_saddr;
L
Linus Torvalds 已提交
2084 2085 2086 2087 2088
	destp = ntohs(tw->tw_dport);
	srcp  = ntohs(tw->tw_sport);

	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
D
Dan Rosenberg 已提交
2089
		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
L
Linus Torvalds 已提交
2090 2091 2092 2093 2094 2095
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3], srcp,
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
		   tw->tw_substate, 0, 0,
2096
		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2097
		   refcount_read(&tw->tw_refcnt), tw);
L
Linus Torvalds 已提交
2098 2099 2100 2101 2102
}

static int tcp6_seq_show(struct seq_file *seq, void *v)
{
	struct tcp_iter_state *st;
E
Eric Dumazet 已提交
2103
	struct sock *sk = v;
L
Linus Torvalds 已提交
2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115

	if (v == SEQ_START_TOKEN) {
		seq_puts(seq,
			 "  sl  "
			 "local_address                         "
			 "remote_address                        "
			 "st tx_queue rx_queue tr tm->when retrnsmt"
			 "   uid  timeout inode\n");
		goto out;
	}
	st = seq->private;

2116 2117 2118
	if (sk->sk_state == TCP_TIME_WAIT)
		get_timewait6_sock(seq, v, st->num);
	else if (sk->sk_state == TCP_NEW_SYN_RECV)
E
Eric Dumazet 已提交
2119
		get_openreq6(seq, v, st->num);
2120 2121
	else
		get_tcp6_sock(seq, v, st->num);
L
Linus Torvalds 已提交
2122 2123 2124 2125
out:
	return 0;
}

2126 2127 2128 2129 2130 2131 2132
static const struct seq_operations tcp6_seq_ops = {
	.show		= tcp6_seq_show,
	.start		= tcp_seq_start,
	.next		= tcp_seq_next,
	.stop		= tcp_seq_stop,
};

L
Linus Torvalds 已提交
2133 2134 2135 2136
static struct tcp_seq_afinfo tcp6_seq_afinfo = {
	.family		= AF_INET6,
};

2137
int __net_init tcp6_proc_init(struct net *net)
L
Linus Torvalds 已提交
2138
{
2139 2140
	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2141 2142
		return -ENOMEM;
	return 0;
L
Linus Torvalds 已提交
2143 2144
}

2145
void tcp6_proc_exit(struct net *net)
L
Linus Torvalds 已提交
2146
{
2147
	remove_proc_entry("tcp6", net->proc_net);
L
Linus Torvalds 已提交
2148 2149 2150 2151 2152 2153 2154
}
#endif

struct proto tcpv6_prot = {
	.name			= "TCPv6",
	.owner			= THIS_MODULE,
	.close			= tcp_close,
A
Andrey Ignatov 已提交
2155
	.pre_connect		= tcp_v6_pre_connect,
L
Linus Torvalds 已提交
2156 2157
	.connect		= tcp_v6_connect,
	.disconnect		= tcp_disconnect,
2158
	.accept			= inet_csk_accept,
L
Linus Torvalds 已提交
2159 2160 2161 2162 2163 2164
	.ioctl			= tcp_ioctl,
	.init			= tcp_v6_init_sock,
	.destroy		= tcp_v6_destroy_sock,
	.shutdown		= tcp_shutdown,
	.setsockopt		= tcp_setsockopt,
	.getsockopt		= tcp_getsockopt,
2165
	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2166
	.keepalive		= tcp_set_keepalive,
L
Linus Torvalds 已提交
2167
	.recvmsg		= tcp_recvmsg,
2168 2169
	.sendmsg		= tcp_sendmsg,
	.sendpage		= tcp_sendpage,
L
Linus Torvalds 已提交
2170
	.backlog_rcv		= tcp_v6_do_rcv,
E
Eric Dumazet 已提交
2171
	.release_cb		= tcp_release_cb,
2172
	.hash			= inet6_hash,
2173 2174
	.unhash			= inet_unhash,
	.get_port		= inet_csk_get_port,
2175
	.put_port		= inet_put_port,
2176 2177 2178
#ifdef CONFIG_BPF_SYSCALL
	.psock_update_sk_prot	= tcp_bpf_update_proto,
#endif
L
Linus Torvalds 已提交
2179
	.enter_memory_pressure	= tcp_enter_memory_pressure,
2180
	.leave_memory_pressure	= tcp_leave_memory_pressure,
2181
	.stream_memory_free	= tcp_stream_memory_free,
L
Linus Torvalds 已提交
2182
	.sockets_allocated	= &tcp_sockets_allocated,
2183

L
Linus Torvalds 已提交
2184
	.memory_allocated	= &tcp_memory_allocated,
2185 2186
	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,

L
Linus Torvalds 已提交
2187
	.memory_pressure	= &tcp_memory_pressure,
2188
	.orphan_count		= &tcp_orphan_count,
2189
	.sysctl_mem		= sysctl_tcp_mem,
2190 2191
	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
L
Linus Torvalds 已提交
2192 2193
	.max_header		= MAX_TCP_HEADER,
	.obj_size		= sizeof(struct tcp6_sock),
2194
	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2195
	.twsk_prot		= &tcp6_timewait_sock_ops,
2196
	.rsk_prot		= &tcp6_request_sock_ops,
2197
	.h.hashinfo		= &tcp_hashinfo,
2198
	.no_autobind		= true,
2199
	.diag_destroy		= tcp_abort,
L
Linus Torvalds 已提交
2200
};
2201
EXPORT_SYMBOL_GPL(tcpv6_prot);
L
Linus Torvalds 已提交
2202

2203
static const struct inet6_protocol tcpv6_protocol = {
L
Linus Torvalds 已提交
2204 2205 2206 2207 2208 2209 2210 2211 2212 2213
	.handler	=	tcp_v6_rcv,
	.err_handler	=	tcp_v6_err,
	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};

static struct inet_protosw tcpv6_protosw = {
	.type		=	SOCK_STREAM,
	.protocol	=	IPPROTO_TCP,
	.prot		=	&tcpv6_prot,
	.ops		=	&inet6_stream_ops,
2214 2215
	.flags		=	INET_PROTOSW_PERMANENT |
				INET_PROTOSW_ICSK,
L
Linus Torvalds 已提交
2216 2217
};

2218
static int __net_init tcpv6_net_init(struct net *net)
2219
{
2220 2221
	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
				    SOCK_RAW, IPPROTO_TCP, net);
2222 2223
}

2224
static void __net_exit tcpv6_net_exit(struct net *net)
2225
{
2226
	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
E
Eric W. Biederman 已提交
2227 2228
}

2229 2230 2231 2232 2233
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
{
	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
}

2234
static struct pernet_operations tcpv6_net_ops = {
E
Eric W. Biederman 已提交
2235 2236
	.init	    = tcpv6_net_init,
	.exit	    = tcpv6_net_exit,
2237
	.exit_batch = tcpv6_net_exit_batch,
2238 2239
};

2240
int __init tcpv6_init(void)
L
Linus Torvalds 已提交
2241
{
2242 2243
	int ret;

2244 2245
	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
	if (ret)
2246
		goto out;
2247

L
Linus Torvalds 已提交
2248
	/* register inet6 protocol */
2249 2250 2251 2252
	ret = inet6_register_protosw(&tcpv6_protosw);
	if (ret)
		goto out_tcpv6_protocol;

2253
	ret = register_pernet_subsys(&tcpv6_net_ops);
2254 2255
	if (ret)
		goto out_tcpv6_protosw;
M
Mat Martineau 已提交
2256 2257 2258 2259 2260

	ret = mptcpv6_init();
	if (ret)
		goto out_tcpv6_pernet_subsys;

2261 2262
out:
	return ret;
2263

M
Mat Martineau 已提交
2264 2265
out_tcpv6_pernet_subsys:
	unregister_pernet_subsys(&tcpv6_net_ops);
2266 2267
out_tcpv6_protosw:
	inet6_unregister_protosw(&tcpv6_protosw);
2268 2269
out_tcpv6_protocol:
	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2270 2271 2272
	goto out;
}

2273
void tcpv6_exit(void)
2274
{
2275
	unregister_pernet_subsys(&tcpv6_net_ops);
2276 2277
	inet6_unregister_protosw(&tcpv6_protosw);
	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
L
Linus Torvalds 已提交
2278
}