tcp_ipv6.c 56.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3
/*
 *	TCP over IPv6
4
 *	Linux INET6 implementation
L
Linus Torvalds 已提交
5 6
 *
 *	Authors:
7
 *	Pedro Roque		<roque@di.fc.ul.pt>
L
Linus Torvalds 已提交
8
 *
9
 *	Based on:
L
Linus Torvalds 已提交
10 11 12 13 14 15 16 17 18 19 20 21
 *	linux/net/ipv4/tcp.c
 *	linux/net/ipv4/tcp_input.c
 *	linux/net/ipv4/tcp_output.c
 *
 *	Fixes:
 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
 */

H
Herbert Xu 已提交
22
#include <linux/bottom_half.h>
L
Linus Torvalds 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/jiffies.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/init.h>
#include <linux/jhash.h>
#include <linux/ipsec.h>
#include <linux/times.h>
37
#include <linux/slab.h>
W
Wang Yufen 已提交
38
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
39 40 41
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
42
#include <linux/indirect_call_wrapper.h>
L
Linus Torvalds 已提交
43 44 45

#include <net/tcp.h>
#include <net/ndisc.h>
46
#include <net/inet6_hashtables.h>
47
#include <net/inet6_connection_sock.h>
L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57
#include <net/ipv6.h>
#include <net/transp_v6.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/ip6_checksum.h>
#include <net/inet_ecn.h>
#include <net/protocol.h>
#include <net/xfrm.h>
#include <net/snmp.h>
#include <net/dsfield.h>
58
#include <net/timewait_sock.h>
59
#include <net/inet_common.h>
60
#include <net/secure_seq.h>
61
#include <net/busy_poll.h>
L
Linus Torvalds 已提交
62 63 64 65

#include <linux/proc_fs.h>
#include <linux/seq_file.h>

H
Herbert Xu 已提交
66
#include <crypto/hash.h>
67 68
#include <linux/scatterlist.h>

69 70
#include <trace/events/tcp.h>

71 72
static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73
				      struct request_sock *req);
L
Linus Torvalds 已提交
74 75 76

static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);

77
static const struct inet_connection_sock_af_ops ipv6_mapped;
78
const struct inet_connection_sock_af_ops ipv6_specific;
79
#ifdef CONFIG_TCP_MD5SIG
80 81
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82
#else
83
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 85
						   const struct in6_addr *addr,
						   int l3index)
86 87 88
{
	return NULL;
}
89
#endif
L
Linus Torvalds 已提交
90

E
Eric Dumazet 已提交
91 92 93
/* Helper returning the inet6 address from a given tcp socket.
 * It can be used in TCP stack instead of inet6_sk(sk).
 * This avoids a dereference and allow compiler optimizations.
94
 * It is a specialized version of inet6_sk_generic().
E
Eric Dumazet 已提交
95 96 97
 */
static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
{
98
	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
E
Eric Dumazet 已提交
99

100
	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
E
Eric Dumazet 已提交
101 102
}

103 104 105 106
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);

E
Eric Dumazet 已提交
107
	if (dst && dst_hold_safe(dst)) {
108 109 110 111
		const struct rt6_info *rt = (const struct rt6_info *)dst;

		sk->sk_rx_dst = dst;
		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
E
Eric Dumazet 已提交
112
		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113
	}
114 115
}

116
static u32 tcp_v6_init_seq(const struct sk_buff *skb)
L
Linus Torvalds 已提交
117
{
118 119 120 121 122 123
	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
				ipv6_hdr(skb)->saddr.s6_addr32,
				tcp_hdr(skb)->dest,
				tcp_hdr(skb)->source);
}

124
static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125
{
126
	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127
				   ipv6_hdr(skb)->saddr.s6_addr32);
L
Linus Torvalds 已提交
128 129
}

A
Andrey Ignatov 已提交
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
			      int addr_len)
{
	/* This check is replicated from tcp_v6_connect() and intended to
	 * prevent BPF program called below from accessing bytes that are out
	 * of the bound specified by user in addr_len.
	 */
	if (addr_len < SIN6_LEN_RFC2133)
		return -EINVAL;

	sock_owned_by_me(sk);

	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
}

145
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
L
Linus Torvalds 已提交
146 147 148
			  int addr_len)
{
	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149
	struct inet_sock *inet = inet_sk(sk);
150
	struct inet_connection_sock *icsk = inet_csk(sk);
E
Eric Dumazet 已提交
151
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
L
Linus Torvalds 已提交
152
	struct tcp_sock *tp = tcp_sk(sk);
153
	struct in6_addr *saddr = NULL, *final_p, final;
154
	struct ipv6_txoptions *opt;
155
	struct flowi6 fl6;
L
Linus Torvalds 已提交
156 157 158
	struct dst_entry *dst;
	int addr_type;
	int err;
159
	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
L
Linus Torvalds 已提交
160

161
	if (addr_len < SIN6_LEN_RFC2133)
L
Linus Torvalds 已提交
162 163
		return -EINVAL;

164
	if (usin->sin6_family != AF_INET6)
E
Eric Dumazet 已提交
165
		return -EAFNOSUPPORT;
L
Linus Torvalds 已提交
166

167
	memset(&fl6, 0, sizeof(fl6));
L
Linus Torvalds 已提交
168 169

	if (np->sndflow) {
170 171 172
		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
		IP6_ECN_flow_init(fl6.flowlabel);
		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
L
Linus Torvalds 已提交
173
			struct ip6_flowlabel *flowlabel;
174
			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175
			if (IS_ERR(flowlabel))
L
Linus Torvalds 已提交
176 177 178 179 180 181
				return -EINVAL;
			fl6_sock_release(flowlabel);
		}
	}

	/*
182 183 184
	 *	connect() to INADDR_ANY means loopback (BSD'ism).
	 */

185 186 187 188 189 190 191
	if (ipv6_addr_any(&usin->sin6_addr)) {
		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
					       &usin->sin6_addr);
		else
			usin->sin6_addr = in6addr_loopback;
	}
L
Linus Torvalds 已提交
192 193 194

	addr_type = ipv6_addr_type(&usin->sin6_addr);

W
Weilong Chen 已提交
195
	if (addr_type & IPV6_ADDR_MULTICAST)
L
Linus Torvalds 已提交
196 197 198 199 200 201 202 203
		return -ENETUNREACH;

	if (addr_type&IPV6_ADDR_LINKLOCAL) {
		if (addr_len >= sizeof(struct sockaddr_in6) &&
		    usin->sin6_scope_id) {
			/* If interface is set while binding, indices
			 * must coincide.
			 */
204
			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
L
Linus Torvalds 已提交
205 206 207 208 209 210 211 212 213 214 215
				return -EINVAL;

			sk->sk_bound_dev_if = usin->sin6_scope_id;
		}

		/* Connect to link-local address requires an interface */
		if (!sk->sk_bound_dev_if)
			return -EINVAL;
	}

	if (tp->rx_opt.ts_recent_stamp &&
216
	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
L
Linus Torvalds 已提交
217 218
		tp->rx_opt.ts_recent = 0;
		tp->rx_opt.ts_recent_stamp = 0;
219
		WRITE_ONCE(tp->write_seq, 0);
L
Linus Torvalds 已提交
220 221
	}

222
	sk->sk_v6_daddr = usin->sin6_addr;
223
	np->flow_label = fl6.flowlabel;
L
Linus Torvalds 已提交
224 225 226 227 228

	/*
	 *	TCP over IPv4
	 */

229
	if (addr_type & IPV6_ADDR_MAPPED) {
230
		u32 exthdrlen = icsk->icsk_ext_hdr_len;
L
Linus Torvalds 已提交
231 232 233 234 235 236 237 238 239
		struct sockaddr_in sin;

		if (__ipv6_only_sock(sk))
			return -ENETUNREACH;

		sin.sin_family = AF_INET;
		sin.sin_port = usin->sin6_port;
		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];

240
		icsk->icsk_af_ops = &ipv6_mapped;
241
		if (sk_is_mptcp(sk))
242
			mptcpv6_handle_mapped(sk, true);
L
Linus Torvalds 已提交
243
		sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 245 246
#ifdef CONFIG_TCP_MD5SIG
		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
L
Linus Torvalds 已提交
247 248 249 250

		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));

		if (err) {
251 252
			icsk->icsk_ext_hdr_len = exthdrlen;
			icsk->icsk_af_ops = &ipv6_specific;
253
			if (sk_is_mptcp(sk))
254
				mptcpv6_handle_mapped(sk, false);
L
Linus Torvalds 已提交
255
			sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 257 258
#ifdef CONFIG_TCP_MD5SIG
			tp->af_specific = &tcp_sock_ipv6_specific;
#endif
L
Linus Torvalds 已提交
259 260
			goto failure;
		}
261
		np->saddr = sk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
262 263 264 265

		return err;
	}

266 267
	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
		saddr = &sk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
268

269
	fl6.flowi6_proto = IPPROTO_TCP;
270
	fl6.daddr = sk->sk_v6_daddr;
A
Alexey Dobriyan 已提交
271
	fl6.saddr = saddr ? *saddr : np->saddr;
272 273
	fl6.flowi6_oif = sk->sk_bound_dev_if;
	fl6.flowi6_mark = sk->sk_mark;
274 275
	fl6.fl6_dport = usin->sin6_port;
	fl6.fl6_sport = inet->inet_sport;
276
	fl6.flowi6_uid = sk->sk_uid;
L
Linus Torvalds 已提交
277

278
	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279
	final_p = fl6_update_dst(&fl6, opt, &final);
L
Linus Torvalds 已提交
280

281
	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
V
Venkat Yekkirala 已提交
282

283
	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 285
	if (IS_ERR(dst)) {
		err = PTR_ERR(dst);
L
Linus Torvalds 已提交
286
		goto failure;
287
	}
L
Linus Torvalds 已提交
288

289
	if (!saddr) {
290
		saddr = &fl6.saddr;
291
		sk->sk_v6_rcv_saddr = *saddr;
L
Linus Torvalds 已提交
292 293 294
	}

	/* set the source address */
A
Alexey Dobriyan 已提交
295
	np->saddr = *saddr;
E
Eric Dumazet 已提交
296
	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
L
Linus Torvalds 已提交
297

H
Herbert Xu 已提交
298
	sk->sk_gso_type = SKB_GSO_TCPV6;
E
Eric Dumazet 已提交
299
	ip6_dst_store(sk, dst, NULL, NULL);
L
Linus Torvalds 已提交
300

301
	icsk->icsk_ext_hdr_len = 0;
302 303 304
	if (opt)
		icsk->icsk_ext_hdr_len = opt->opt_flen +
					 opt->opt_nflen;
L
Linus Torvalds 已提交
305 306 307

	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);

E
Eric Dumazet 已提交
308
	inet->inet_dport = usin->sin6_port;
L
Linus Torvalds 已提交
309 310

	tcp_set_state(sk, TCP_SYN_SENT);
311
	err = inet6_hash_connect(tcp_death_row, sk);
L
Linus Torvalds 已提交
312 313 314
	if (err)
		goto late_failure;

315
	sk_set_txhash(sk);
316

317 318
	if (likely(!tp->repair)) {
		if (!tp->write_seq)
319 320 321 322 323
			WRITE_ONCE(tp->write_seq,
				   secure_tcpv6_seq(np->saddr.s6_addr32,
						    sk->sk_v6_daddr.s6_addr32,
						    inet->inet_sport,
						    inet->inet_dport));
324 325
		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
						   np->saddr.s6_addr32,
326
						   sk->sk_v6_daddr.s6_addr32);
327
	}
L
Linus Torvalds 已提交
328

W
Wei Wang 已提交
329 330 331 332 333
	if (tcp_fastopen_defer_connect(sk, &err))
		return err;
	if (err)
		goto late_failure;

L
Linus Torvalds 已提交
334 335 336 337 338 339 340 341 342
	err = tcp_connect(sk);
	if (err)
		goto late_failure;

	return 0;

late_failure:
	tcp_set_state(sk, TCP_CLOSE);
failure:
E
Eric Dumazet 已提交
343
	inet->inet_dport = 0;
L
Linus Torvalds 已提交
344 345 346 347
	sk->sk_route_caps = 0;
	return err;
}

348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
static void tcp_v6_mtu_reduced(struct sock *sk)
{
	struct dst_entry *dst;

	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
		return;

	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
	if (!dst)
		return;

	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
		tcp_sync_mss(sk, dst_mtu(dst));
		tcp_simple_retransmit(sk);
	}
}

365
static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366
		u8 type, u8 code, int offset, __be32 info)
L
Linus Torvalds 已提交
367
{
W
Weilong Chen 已提交
368
	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369
	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370 371
	struct net *net = dev_net(skb->dev);
	struct request_sock *fastopen;
L
Linus Torvalds 已提交
372
	struct ipv6_pinfo *np;
373
	struct tcp_sock *tp;
374
	__u32 seq, snd_una;
375
	struct sock *sk;
376
	bool fatal;
377
	int err;
L
Linus Torvalds 已提交
378

379 380 381
	sk = __inet6_lookup_established(net, &tcp_hashinfo,
					&hdr->daddr, th->dest,
					&hdr->saddr, ntohs(th->source),
382
					skb->dev->ifindex, inet6_sdif(skb));
L
Linus Torvalds 已提交
383

384
	if (!sk) {
E
Eric Dumazet 已提交
385 386
		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
				  ICMP6_MIB_INERRORS);
387
		return -ENOENT;
L
Linus Torvalds 已提交
388 389 390
	}

	if (sk->sk_state == TCP_TIME_WAIT) {
391
		inet_twsk_put(inet_twsk(sk));
392
		return 0;
L
Linus Torvalds 已提交
393
	}
394
	seq = ntohl(th->seq);
395
	fatal = icmpv6_err_convert(type, code, &err);
396 397 398 399
	if (sk->sk_state == TCP_NEW_SYN_RECV) {
		tcp_req_err(sk, seq, fatal);
		return 0;
	}
L
Linus Torvalds 已提交
400 401

	bh_lock_sock(sk);
402
	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403
		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
L
Linus Torvalds 已提交
404 405 406 407

	if (sk->sk_state == TCP_CLOSE)
		goto out;

E
Eric Dumazet 已提交
408
	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409
		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
410 411 412
		goto out;
	}

L
Linus Torvalds 已提交
413
	tp = tcp_sk(sk);
414
	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415
	fastopen = rcu_dereference(tp->fastopen_rsk);
416
	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
L
Linus Torvalds 已提交
417
	if (sk->sk_state != TCP_LISTEN &&
418
	    !between(seq, snd_una, tp->snd_nxt)) {
419
		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
L
Linus Torvalds 已提交
420 421 422
		goto out;
	}

E
Eric Dumazet 已提交
423
	np = tcp_inet6_sk(sk);
L
Linus Torvalds 已提交
424

425
	if (type == NDISC_REDIRECT) {
426 427
		if (!sock_owned_by_user(sk)) {
			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428

429 430 431
			if (dst)
				dst->ops->redirect(dst, sk, skb);
		}
432
		goto out;
433 434
	}

L
Linus Torvalds 已提交
435
	if (type == ICMPV6_PKT_TOOBIG) {
436 437 438 439 440 441 442
		/* We are not interested in TCP_LISTEN and open_requests
		 * (SYN-ACKs send out by Linux are always <576bytes so
		 * they should go through unfragmented).
		 */
		if (sk->sk_state == TCP_LISTEN)
			goto out;

443 444 445
		if (!ip6_sk_accept_pmtu(sk))
			goto out;

446 447 448
		tp->mtu_info = ntohl(info);
		if (!sock_owned_by_user(sk))
			tcp_v6_mtu_reduced(sk);
449
		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450
					   &sk->sk_tsq_flags))
451
			sock_hold(sk);
L
Linus Torvalds 已提交
452 453 454 455
		goto out;
	}


456
	/* Might be for an request_sock */
L
Linus Torvalds 已提交
457 458
	switch (sk->sk_state) {
	case TCP_SYN_SENT:
459 460 461 462
	case TCP_SYN_RECV:
		/* Only in fast or simultaneous open. If a fast open socket is
		 * is already accepted it is treated as a connected one below.
		 */
463
		if (fastopen && !fastopen->sk)
464 465
			break;

466 467
		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);

L
Linus Torvalds 已提交
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
		if (!sock_owned_by_user(sk)) {
			sk->sk_err = err;
			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */

			tcp_done(sk);
		} else
			sk->sk_err_soft = err;
		goto out;
	}

	if (!sock_owned_by_user(sk) && np->recverr) {
		sk->sk_err = err;
		sk->sk_error_report(sk);
	} else
		sk->sk_err_soft = err;

out:
	bh_unlock_sock(sk);
	sock_put(sk);
487
	return 0;
L
Linus Torvalds 已提交
488 489 490
}


491
static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
492
			      struct flowi *fl,
493
			      struct request_sock *req,
494
			      struct tcp_fastopen_cookie *foc,
495
			      enum tcp_synack_type synack_type)
L
Linus Torvalds 已提交
496
{
497
	struct inet_request_sock *ireq = inet_rsk(req);
E
Eric Dumazet 已提交
498
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
499
	struct ipv6_txoptions *opt;
500
	struct flowi6 *fl6 = &fl->u.ip6;
W
Weilong Chen 已提交
501
	struct sk_buff *skb;
502
	int err = -ENOMEM;
L
Linus Torvalds 已提交
503

504
	/* First, grab a route. */
505 506
	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
					       IPPROTO_TCP)) == NULL)
507
		goto done;
508

509
	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
510

L
Linus Torvalds 已提交
511
	if (skb) {
512 513
		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
				    &ireq->ir_v6_rmt_addr);
L
Linus Torvalds 已提交
514

515
		fl6->daddr = ireq->ir_v6_rmt_addr;
516
		if (np->repflow && ireq->pktopts)
517 518
			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));

519
		rcu_read_lock();
520 521 522
		opt = ireq->ipv6_opt;
		if (!opt)
			opt = rcu_dereference(np->opt);
523 524
		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
			       sk->sk_priority);
525
		rcu_read_unlock();
526
		err = net_xmit_eval(err);
L
Linus Torvalds 已提交
527 528 529 530 531 532
	}

done:
	return err;
}

533

534
static void tcp_v6_reqsk_destructor(struct request_sock *req)
L
Linus Torvalds 已提交
535
{
536
	kfree(inet_rsk(req)->ipv6_opt);
537
	kfree_skb(inet_rsk(req)->pktopts);
L
Linus Torvalds 已提交
538 539
}

540
#ifdef CONFIG_TCP_MD5SIG
541
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
542 543
						   const struct in6_addr *addr,
						   int l3index)
544
{
545 546
	return tcp_md5_do_lookup(sk, l3index,
				 (union tcp_md5_addr *)addr, AF_INET6);
547 548
}

549
static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
550
						const struct sock *addr_sk)
551
{
552 553 554 555 556 557
	int l3index;

	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
						 addr_sk->sk_bound_dev_if);
	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
				    l3index);
558 559
}

560 561
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
				 char __user *optval, int optlen)
562 563 564
{
	struct tcp_md5sig cmd;
	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
565
	int l3index = 0;
566
	u8 prefixlen;
567 568 569 570 571 572 573 574 575 576

	if (optlen < sizeof(cmd))
		return -EINVAL;

	if (copy_from_user(&cmd, optval, sizeof(cmd)))
		return -EFAULT;

	if (sin6->sin6_family != AF_INET6)
		return -EINVAL;

577 578 579 580 581 582 583 584 585 586
	if (optname == TCP_MD5SIG_EXT &&
	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
		prefixlen = cmd.tcpm_prefixlen;
		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
					prefixlen > 32))
			return -EINVAL;
	} else {
		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
	}

587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
	if (optname == TCP_MD5SIG_EXT &&
	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
		struct net_device *dev;

		rcu_read_lock();
		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
		if (dev && netif_is_l3_master(dev))
			l3index = dev->ifindex;
		rcu_read_unlock();

		/* ok to reference set/not set outside of rcu;
		 * right now device MUST be an L3 master
		 */
		if (!dev || !l3index)
			return -EINVAL;
	}

604
	if (!cmd.tcpm_keylen) {
B
Brian Haley 已提交
605
		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
E
Eric Dumazet 已提交
606
			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
607 608
					      AF_INET, prefixlen,
					      l3index);
E
Eric Dumazet 已提交
609
		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
610
				      AF_INET6, prefixlen, l3index);
611 612 613 614 615
	}

	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
		return -EINVAL;

E
Eric Dumazet 已提交
616 617
	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
618 619 620
				      AF_INET, prefixlen, l3index,
				      cmd.tcpm_key, cmd.tcpm_keylen,
				      GFP_KERNEL);
621

E
Eric Dumazet 已提交
622
	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
623 624
			      AF_INET6, prefixlen, l3index,
			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
625 626
}

627 628 629 630
static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
				   const struct in6_addr *daddr,
				   const struct in6_addr *saddr,
				   const struct tcphdr *th, int nbytes)
631 632
{
	struct tcp6_pseudohdr *bp;
633
	struct scatterlist sg;
634
	struct tcphdr *_th;
635

636
	bp = hp->scratch;
637
	/* 1. TCP pseudo-header (RFC2460) */
A
Alexey Dobriyan 已提交
638 639
	bp->saddr = *saddr;
	bp->daddr = *daddr;
640
	bp->protocol = cpu_to_be32(IPPROTO_TCP);
A
Adam Langley 已提交
641
	bp->len = cpu_to_be32(nbytes);
642

643 644 645 646 647 648 649
	_th = (struct tcphdr *)(bp + 1);
	memcpy(_th, th, sizeof(*th));
	_th->check = 0;

	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
				sizeof(*bp) + sizeof(*th));
H
Herbert Xu 已提交
650
	return crypto_ahash_update(hp->md5_req);
651
}
652

653
static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
654
			       const struct in6_addr *daddr, struct in6_addr *saddr,
E
Eric Dumazet 已提交
655
			       const struct tcphdr *th)
656 657
{
	struct tcp_md5sig_pool *hp;
H
Herbert Xu 已提交
658
	struct ahash_request *req;
659 660 661 662

	hp = tcp_get_md5sig_pool();
	if (!hp)
		goto clear_hash_noput;
H
Herbert Xu 已提交
663
	req = hp->md5_req;
664

H
Herbert Xu 已提交
665
	if (crypto_ahash_init(req))
666
		goto clear_hash;
667
	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
668 669 670
		goto clear_hash;
	if (tcp_md5_hash_key(hp, key))
		goto clear_hash;
H
Herbert Xu 已提交
671 672
	ahash_request_set_crypt(req, NULL, md5_hash, 0);
	if (crypto_ahash_final(req))
673 674 675 676
		goto clear_hash;

	tcp_put_md5sig_pool();
	return 0;
677

678 679 680 681
clear_hash:
	tcp_put_md5sig_pool();
clear_hash_noput:
	memset(md5_hash, 0, 16);
682
	return 1;
683 684
}

685 686
static int tcp_v6_md5_hash_skb(char *md5_hash,
			       const struct tcp_md5sig_key *key,
E
Eric Dumazet 已提交
687 688
			       const struct sock *sk,
			       const struct sk_buff *skb)
689
{
690
	const struct in6_addr *saddr, *daddr;
691
	struct tcp_md5sig_pool *hp;
H
Herbert Xu 已提交
692
	struct ahash_request *req;
E
Eric Dumazet 已提交
693
	const struct tcphdr *th = tcp_hdr(skb);
694

695 696
	if (sk) { /* valid for establish/request sockets */
		saddr = &sk->sk_v6_rcv_saddr;
697
		daddr = &sk->sk_v6_daddr;
698
	} else {
699
		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
700 701
		saddr = &ip6h->saddr;
		daddr = &ip6h->daddr;
702
	}
703 704 705 706

	hp = tcp_get_md5sig_pool();
	if (!hp)
		goto clear_hash_noput;
H
Herbert Xu 已提交
707
	req = hp->md5_req;
708

H
Herbert Xu 已提交
709
	if (crypto_ahash_init(req))
710 711
		goto clear_hash;

712
	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
713 714 715 716 717
		goto clear_hash;
	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
		goto clear_hash;
	if (tcp_md5_hash_key(hp, key))
		goto clear_hash;
H
Herbert Xu 已提交
718 719
	ahash_request_set_crypt(req, NULL, md5_hash, 0);
	if (crypto_ahash_final(req))
720 721 722 723 724 725 726 727 728 729
		goto clear_hash;

	tcp_put_md5sig_pool();
	return 0;

clear_hash:
	tcp_put_md5sig_pool();
clear_hash_noput:
	memset(md5_hash, 0, 16);
	return 1;
730 731
}

732 733 734
#endif

static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
735 736
				    const struct sk_buff *skb,
				    int dif, int sdif)
737
{
738
#ifdef CONFIG_TCP_MD5SIG
739
	const __u8 *hash_location = NULL;
740
	struct tcp_md5sig_key *hash_expected;
741
	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
E
Eric Dumazet 已提交
742
	const struct tcphdr *th = tcp_hdr(skb);
743
	int genhash, l3index;
744 745
	u8 newhash[16];

746 747 748 749 750 751
	/* sdif set, means packet ingressed via a device
	 * in an L3 domain and dif is set to the l3mdev
	 */
	l3index = sdif ? dif : 0;

	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
752
	hash_location = tcp_parse_md5sig_option(th);
753

754 755
	/* We've parsed the options - do we have a hash? */
	if (!hash_expected && !hash_location)
756
		return false;
757 758

	if (hash_expected && !hash_location) {
759
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
760
		return true;
761 762
	}

763
	if (!hash_expected && hash_location) {
764
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
765
		return true;
766 767 768
	}

	/* check the signature */
769 770
	genhash = tcp_v6_md5_hash_skb(newhash,
				      hash_expected,
771
				      NULL, skb);
772

773
	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
774
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
775
		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
776 777
				     genhash ? "failed" : "mismatch",
				     &ip6h->saddr, ntohs(th->source),
778
				     &ip6h->daddr, ntohs(th->dest), l3index);
779
		return true;
780
	}
781
#endif
782
	return false;
783 784
}

785 786
static void tcp_v6_init_req(struct request_sock *req,
			    const struct sock *sk_listener,
787 788
			    struct sk_buff *skb)
{
789
	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
790
	struct inet_request_sock *ireq = inet_rsk(req);
E
Eric Dumazet 已提交
791
	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
792 793 794 795 796

	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;

	/* So that link locals have meaning */
797
	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
798
	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
E
Eric Dumazet 已提交
799
		ireq->ir_iif = tcp_v6_iif(skb);
800

801
	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
802
	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
803
	     np->rxopt.bits.rxinfo ||
804 805
	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
	     np->rxopt.bits.rxohlim || np->repflow)) {
806
		refcount_inc(&skb->users);
807 808 809 810
		ireq->pktopts = skb;
	}
}

811 812
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
					  struct flowi *fl,
813
					  const struct request_sock *req)
814
{
815
	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
816 817
}

818
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
L
Linus Torvalds 已提交
819
	.family		=	AF_INET6,
820
	.obj_size	=	sizeof(struct tcp6_request_sock),
821
	.rtx_syn_ack	=	tcp_rtx_synack,
822 823
	.send_ack	=	tcp_v6_reqsk_send_ack,
	.destructor	=	tcp_v6_reqsk_destructor,
824
	.send_reset	=	tcp_v6_send_reset,
W
Wang Yufen 已提交
825
	.syn_ack_timeout =	tcp_syn_ack_timeout,
L
Linus Torvalds 已提交
826 827
};

828
const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
829 830
	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
				sizeof(struct ipv6hdr),
831
#ifdef CONFIG_TCP_MD5SIG
832
	.req_md5_lookup	=	tcp_v6_md5_lookup,
833
	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
834
#endif
835
	.init_req	=	tcp_v6_init_req,
836 837 838
#ifdef CONFIG_SYN_COOKIES
	.cookie_init_seq =	cookie_v6_init_sequence,
#endif
839
	.route_req	=	tcp_v6_route_req,
840 841
	.init_seq	=	tcp_v6_init_seq,
	.init_ts_off	=	tcp_v6_init_ts_off,
842
	.send_synack	=	tcp_v6_send_synack,
843
};
844

845
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
846 847
				 u32 ack, u32 win, u32 tsval, u32 tsecr,
				 int oif, struct tcp_md5sig_key *key, int rst,
848
				 u8 tclass, __be32 label, u32 priority)
L
Linus Torvalds 已提交
849
{
850 851
	const struct tcphdr *th = tcp_hdr(skb);
	struct tcphdr *t1;
L
Linus Torvalds 已提交
852
	struct sk_buff *buff;
853
	struct flowi6 fl6;
854
	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
855
	struct sock *ctl_sk = net->ipv6.tcp_sk;
856
	unsigned int tot_len = sizeof(struct tcphdr);
E
Eric Dumazet 已提交
857
	struct dst_entry *dst;
858
	__be32 *topt;
J
Jon Maxwell 已提交
859
	__u32 mark = 0;
L
Linus Torvalds 已提交
860

861
	if (tsecr)
862
		tot_len += TCPOLEN_TSTAMP_ALIGNED;
863 864 865 866 867 868
#ifdef CONFIG_TCP_MD5SIG
	if (key)
		tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif

	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
L
Linus Torvalds 已提交
869
			 GFP_ATOMIC);
870
	if (!buff)
871
		return;
L
Linus Torvalds 已提交
872

873
	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
L
Linus Torvalds 已提交
874

875
	t1 = skb_push(buff, tot_len);
876
	skb_reset_transport_header(buff);
L
Linus Torvalds 已提交
877 878 879 880 881

	/* Swap the send and the receive. */
	memset(t1, 0, sizeof(*t1));
	t1->dest = th->source;
	t1->source = th->dest;
882
	t1->doff = tot_len / 4;
883 884 885 886 887
	t1->seq = htonl(seq);
	t1->ack_seq = htonl(ack);
	t1->ack = !rst || !th->ack;
	t1->rst = rst;
	t1->window = htons(win);
L
Linus Torvalds 已提交
888

889 890
	topt = (__be32 *)(t1 + 1);

891
	if (tsecr) {
892 893
		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
894 895
		*topt++ = htonl(tsval);
		*topt++ = htonl(tsecr);
896 897
	}

898 899
#ifdef CONFIG_TCP_MD5SIG
	if (key) {
900 901 902
		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
903 904
				    &ipv6_hdr(skb)->saddr,
				    &ipv6_hdr(skb)->daddr, t1);
905 906 907
	}
#endif

908
	memset(&fl6, 0, sizeof(fl6));
A
Alexey Dobriyan 已提交
909 910
	fl6.daddr = ipv6_hdr(skb)->saddr;
	fl6.saddr = ipv6_hdr(skb)->daddr;
911
	fl6.flowlabel = label;
L
Linus Torvalds 已提交
912

913 914 915
	buff->ip_summed = CHECKSUM_PARTIAL;
	buff->csum = 0;

916
	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
L
Linus Torvalds 已提交
917

918
	fl6.flowi6_proto = IPPROTO_TCP;
919
	if (rt6_need_strict(&fl6.daddr) && !oif)
E
Eric Dumazet 已提交
920
		fl6.flowi6_oif = tcp_v6_iif(skb);
921 922 923 924 925 926
	else {
		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
			oif = skb->skb_iif;

		fl6.flowi6_oif = oif;
	}
927

928 929 930 931 932 933 934 935 936
	if (sk) {
		if (sk->sk_state == TCP_TIME_WAIT) {
			mark = inet_twsk(sk)->tw_mark;
			/* autoflowlabel relies on buff->hash */
			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
				     PKT_HASH_TYPE_L4);
		} else {
			mark = sk->sk_mark;
		}
937
		buff->tstamp = tcp_transmit_time(sk);
938
	}
J
Jon Maxwell 已提交
939
	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
940 941
	fl6.fl6_dport = t1->dest;
	fl6.fl6_sport = t1->source;
942
	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
943
	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
L
Linus Torvalds 已提交
944

945 946 947 948
	/* Pass a socket to ip6_dst_lookup either it is for RST
	 * Underlying function will use this to retrieve the network
	 * namespace
	 */
949
	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
950 951
	if (!IS_ERR(dst)) {
		skb_dst_set(buff, dst);
952
		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
953
			 priority);
954
		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
955
		if (rst)
956
			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
957
		return;
L
Linus Torvalds 已提交
958 959 960 961 962
	}

	kfree_skb(buff);
}

963
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
964
{
965
	const struct tcphdr *th = tcp_hdr(skb);
966
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
967
	u32 seq = 0, ack_seq = 0;
968
	struct tcp_md5sig_key *key = NULL;
969 970 971 972 973 974
#ifdef CONFIG_TCP_MD5SIG
	const __u8 *hash_location = NULL;
	unsigned char newhash[16];
	int genhash;
	struct sock *sk1 = NULL;
#endif
975
	__be32 label = 0;
976
	u32 priority = 0;
977
	struct net *net;
978
	int oif = 0;
L
Linus Torvalds 已提交
979

980
	if (th->rst)
L
Linus Torvalds 已提交
981 982
		return;

983 984 985 986
	/* If sk not NULL, it means we did a successful lookup and incoming
	 * route had to be correct. prequeue might have dropped our dst.
	 */
	if (!sk && !ipv6_unicast_destination(skb))
987
		return;
L
Linus Torvalds 已提交
988

989
	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
990
#ifdef CONFIG_TCP_MD5SIG
991
	rcu_read_lock();
992
	hash_location = tcp_parse_md5sig_option(th);
993
	if (sk && sk_fullsock(sk)) {
994 995 996 997 998 999 1000
		int l3index;

		/* sdif set, means packet ingressed via a device
		 * in an L3 domain and inet_iif is set to it.
		 */
		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1001
	} else if (hash_location) {
1002 1003
		int dif = tcp_v6_iif_l3_slave(skb);
		int sdif = tcp_v6_sdif(skb);
1004
		int l3index;
1005

1006 1007 1008 1009 1010 1011 1012
		/*
		 * active side is lost. Try to find listening socket through
		 * source port, and then find md5 key through listening socket.
		 * we are not loose security here:
		 * Incoming packet is checked with md5 hash with finding key,
		 * no RST generated if md5 hash doesn't match.
		 */
1013
		sk1 = inet6_lookup_listener(net,
1014 1015
					   &tcp_hashinfo, NULL, 0,
					   &ipv6h->saddr,
1016
					   th->source, &ipv6h->daddr,
1017
					   ntohs(th->source), dif, sdif);
1018
		if (!sk1)
1019
			goto out;
1020

1021 1022 1023 1024 1025 1026
		/* sdif set, means packet ingressed via a device
		 * in an L3 domain and dif is set to it.
		 */
		l3index = tcp_v6_sdif(skb) ? dif : 0;

		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1027
		if (!key)
1028
			goto out;
1029

1030
		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1031
		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1032
			goto out;
1033
	}
1034 1035
#endif

1036 1037 1038 1039 1040
	if (th->ack)
		seq = ntohl(th->ack_seq);
	else
		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
			  (th->doff << 2);
L
Linus Torvalds 已提交
1041

1042 1043
	if (sk) {
		oif = sk->sk_bound_dev_if;
1044 1045 1046
		if (sk_fullsock(sk)) {
			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);

1047
			trace_tcp_send_reset(sk, skb);
1048 1049
			if (np->repflow)
				label = ip6_flowlabel(ipv6h);
1050
			priority = sk->sk_priority;
1051
		}
1052
		if (sk->sk_state == TCP_TIME_WAIT) {
1053
			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1054 1055
			priority = inet_twsk(sk)->tw_priority;
		}
1056
	} else {
1057
		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1058
			label = ip6_flowlabel(ipv6h);
1059 1060
	}

1061
	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1062
			     label, priority);
1063 1064

#ifdef CONFIG_TCP_MD5SIG
1065 1066
out:
	rcu_read_unlock();
1067
#endif
1068
}
L
Linus Torvalds 已提交
1069

1070
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1071
			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1072
			    struct tcp_md5sig_key *key, u8 tclass,
1073
			    __be32 label, u32 priority)
1074
{
1075
	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1076
			     tclass, label, priority);
L
Linus Torvalds 已提交
1077 1078 1079 1080
}

static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
1081
	struct inet_timewait_sock *tw = inet_twsk(sk);
1082
	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
L
Linus Torvalds 已提交
1083

1084
	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1085
			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1086
			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
W
Wang Yufen 已提交
1087
			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1088
			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
L
Linus Torvalds 已提交
1089

1090
	inet_twsk_put(tw);
L
Linus Torvalds 已提交
1091 1092
}

1093
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1094
				  struct request_sock *req)
L
Linus Torvalds 已提交
1095
{
1096 1097 1098 1099
	int l3index;

	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;

1100 1101 1102
	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
	 */
1103 1104 1105 1106 1107
	/* RFC 7323 2.3
	 * The window field (SEG.WND) of every outgoing segment, with the
	 * exception of <SYN> segments, MUST be right-shifted by
	 * Rcv.Wind.Shift bits:
	 */
1108
	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1109
			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1110 1111
			tcp_rsk(req)->rcv_nxt,
			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1112
			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1113
			req->ts_recent, sk->sk_bound_dev_if,
1114
			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1115
			0, 0, sk->sk_priority);
L
Linus Torvalds 已提交
1116 1117 1118
}


1119
static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
1120
{
1121
#ifdef CONFIG_SYN_COOKIES
1122
	const struct tcphdr *th = tcp_hdr(skb);
L
Linus Torvalds 已提交
1123

1124
	if (!th->syn)
1125
		sk = cookie_v6_check(sk, skb);
L
Linus Torvalds 已提交
1126 1127 1128 1129
#endif
	return sk;
}

1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
			 struct tcphdr *th, u32 *cookie)
{
	u16 mss = 0;
#ifdef CONFIG_SYN_COOKIES
	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
				    &tcp_request_sock_ipv6_ops, sk, th);
	if (mss) {
		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
		tcp_synq_overflow(sk);
	}
#endif
	return mss;
}

L
Linus Torvalds 已提交
1145 1146 1147 1148 1149 1150
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
	if (skb->protocol == htons(ETH_P_IP))
		return tcp_v4_conn_request(sk, skb);

	if (!ipv6_unicast_destination(skb))
1151
		goto drop;
L
Linus Torvalds 已提交
1152

O
Octavian Purdila 已提交
1153 1154
	return tcp_conn_request(&tcp6_request_sock_ops,
				&tcp_request_sock_ipv6_ops, sk, skb);
L
Linus Torvalds 已提交
1155 1156

drop:
1157
	tcp_listendrop(sk);
L
Linus Torvalds 已提交
1158 1159 1160
	return 0; /* don't send reset */
}

1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
static void tcp_v6_restore_cb(struct sk_buff *skb)
{
	/* We need to move header back to the beginning if xfrm6_policy_check()
	 * and tcp_v6_fill_cb() are going to be called again.
	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
	 */
	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
		sizeof(struct inet6_skb_parm));
}

1171
static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
W
Weilong Chen 已提交
1172
					 struct request_sock *req,
1173 1174 1175
					 struct dst_entry *dst,
					 struct request_sock *req_unhash,
					 bool *own_req)
L
Linus Torvalds 已提交
1176
{
1177
	struct inet_request_sock *ireq;
1178
	struct ipv6_pinfo *newnp;
E
Eric Dumazet 已提交
1179
	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1180
	struct ipv6_txoptions *opt;
L
Linus Torvalds 已提交
1181 1182 1183
	struct inet_sock *newinet;
	struct tcp_sock *newtp;
	struct sock *newsk;
1184 1185
#ifdef CONFIG_TCP_MD5SIG
	struct tcp_md5sig_key *key;
1186
	int l3index;
1187
#endif
1188
	struct flowi6 fl6;
L
Linus Torvalds 已提交
1189 1190 1191 1192 1193 1194

	if (skb->protocol == htons(ETH_P_IP)) {
		/*
		 *	v6 mapped
		 */

1195 1196
		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
					     req_unhash, own_req);
L
Linus Torvalds 已提交
1197

1198
		if (!newsk)
L
Linus Torvalds 已提交
1199 1200
			return NULL;

E
Eric Dumazet 已提交
1201
		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1202 1203

		newinet = inet_sk(newsk);
E
Eric Dumazet 已提交
1204
		newnp = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1205 1206 1207 1208
		newtp = tcp_sk(newsk);

		memcpy(newnp, np, sizeof(struct ipv6_pinfo));

1209
		newnp->saddr = newsk->sk_v6_rcv_saddr;
L
Linus Torvalds 已提交
1210

1211
		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1212
		if (sk_is_mptcp(newsk))
1213
			mptcpv6_handle_mapped(newsk, true);
L
Linus Torvalds 已提交
1214
		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1215 1216 1217 1218
#ifdef CONFIG_TCP_MD5SIG
		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif

1219
		newnp->ipv6_mc_list = NULL;
1220 1221
		newnp->ipv6_ac_list = NULL;
		newnp->ipv6_fl_list = NULL;
L
Linus Torvalds 已提交
1222 1223
		newnp->pktoptions  = NULL;
		newnp->opt	   = NULL;
1224 1225 1226
		newnp->mcast_oif   = inet_iif(skb);
		newnp->mcast_hops  = ip_hdr(skb)->ttl;
		newnp->rcv_flowinfo = 0;
1227
		if (np->repflow)
1228
			newnp->flow_label = 0;
L
Linus Torvalds 已提交
1229

1230 1231 1232 1233
		/*
		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
		 * here, tcp_create_openreq_child now does this for us, see the comment in
		 * that function for the gory details. -acme
L
Linus Torvalds 已提交
1234 1235 1236
		 */

		/* It is tricky place. Until this moment IPv4 tcp
1237
		   worked with IPv6 icsk.icsk_af_ops.
L
Linus Torvalds 已提交
1238 1239
		   Sync it now.
		 */
1240
		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
L
Linus Torvalds 已提交
1241 1242 1243 1244

		return newsk;
	}

1245
	ireq = inet_rsk(req);
L
Linus Torvalds 已提交
1246 1247 1248 1249

	if (sk_acceptq_is_full(sk))
		goto out_overflow;

1250
	if (!dst) {
1251
		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1252
		if (!dst)
L
Linus Torvalds 已提交
1253
			goto out;
1254
	}
L
Linus Torvalds 已提交
1255 1256

	newsk = tcp_create_openreq_child(sk, req, skb);
1257
	if (!newsk)
1258
		goto out_nonewsk;
L
Linus Torvalds 已提交
1259

1260 1261 1262 1263 1264
	/*
	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
	 * count here, tcp_create_openreq_child now does this for us, see the
	 * comment in that function for the gory details. -acme
	 */
L
Linus Torvalds 已提交
1265

1266
	newsk->sk_gso_type = SKB_GSO_TCPV6;
E
Eric Dumazet 已提交
1267
	ip6_dst_store(newsk, dst, NULL, NULL);
1268
	inet6_sk_rx_dst_set(newsk, skb);
L
Linus Torvalds 已提交
1269

E
Eric Dumazet 已提交
1270
	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1271 1272 1273

	newtp = tcp_sk(newsk);
	newinet = inet_sk(newsk);
E
Eric Dumazet 已提交
1274
	newnp = tcp_inet6_sk(newsk);
L
Linus Torvalds 已提交
1275 1276 1277

	memcpy(newnp, np, sizeof(struct ipv6_pinfo));

1278 1279 1280 1281
	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
	newnp->saddr = ireq->ir_v6_loc_addr;
	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
	newsk->sk_bound_dev_if = ireq->ir_iif;
L
Linus Torvalds 已提交
1282

1283
	/* Now IPv6 options...
L
Linus Torvalds 已提交
1284 1285 1286

	   First: no IPv4 options.
	 */
1287
	newinet->inet_opt = NULL;
1288
	newnp->ipv6_mc_list = NULL;
1289
	newnp->ipv6_ac_list = NULL;
1290
	newnp->ipv6_fl_list = NULL;
L
Linus Torvalds 已提交
1291 1292 1293 1294 1295 1296

	/* Clone RX bits */
	newnp->rxopt.all = np->rxopt.all;

	newnp->pktoptions = NULL;
	newnp->opt	  = NULL;
E
Eric Dumazet 已提交
1297
	newnp->mcast_oif  = tcp_v6_iif(skb);
1298
	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1299
	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1300 1301
	if (np->repflow)
		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
L
Linus Torvalds 已提交
1302 1303 1304 1305 1306 1307 1308

	/* Clone native IPv6 options from listening socket (if any)

	   Yes, keeping reference count would be much more clever,
	   but we make one more one thing there: reattach optmem
	   to newsk.
	 */
1309 1310 1311
	opt = ireq->ipv6_opt;
	if (!opt)
		opt = rcu_dereference(np->opt);
1312 1313 1314 1315
	if (opt) {
		opt = ipv6_dup_options(newsk, opt);
		RCU_INIT_POINTER(newnp->opt, opt);
	}
1316
	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1317 1318 1319
	if (opt)
		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
						    opt->opt_flen;
L
Linus Torvalds 已提交
1320

1321 1322
	tcp_ca_openreq_child(newsk, dst);

L
Linus Torvalds 已提交
1323
	tcp_sync_mss(newsk, dst_mtu(dst));
E
Eric Dumazet 已提交
1324
	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1325

L
Linus Torvalds 已提交
1326 1327
	tcp_initialize_rcv_mss(newsk);

E
Eric Dumazet 已提交
1328 1329
	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
L
Linus Torvalds 已提交
1330

1331
#ifdef CONFIG_TCP_MD5SIG
1332 1333
	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);

1334
	/* Copy over the MD5 key from the original socket */
1335
	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1336
	if (key) {
1337 1338 1339 1340 1341
		/* We're using one, so create a matching key
		 * on the newsk structure. If we fail to get
		 * memory, then we end up not copying the key
		 * across. Shucks.
		 */
1342
		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1343
			       AF_INET6, 128, l3index, key->key, key->keylen,
1344
			       sk_gfp_mask(sk, GFP_ATOMIC));
1345 1346 1347
	}
#endif

1348
	if (__inet_inherit_port(sk, newsk) < 0) {
1349 1350
		inet_csk_prepare_forced_close(newsk);
		tcp_done(newsk);
1351 1352
		goto out;
	}
1353
	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
E
Eric Dumazet 已提交
1354
	if (*own_req) {
1355
		tcp_move_syn(newtp, req);
E
Eric Dumazet 已提交
1356 1357 1358 1359

		/* Clone pktoptions received with SYN, if we own the req */
		if (ireq->pktopts) {
			newnp->pktoptions = skb_clone(ireq->pktopts,
1360
						      sk_gfp_mask(sk, GFP_ATOMIC));
E
Eric Dumazet 已提交
1361 1362
			consume_skb(ireq->pktopts);
			ireq->pktopts = NULL;
1363 1364
			if (newnp->pktoptions) {
				tcp_v6_restore_cb(newnp->pktoptions);
E
Eric Dumazet 已提交
1365
				skb_set_owner_r(newnp->pktoptions, newsk);
1366
			}
E
Eric Dumazet 已提交
1367
		}
E
Eric Dumazet 已提交
1368
	}
L
Linus Torvalds 已提交
1369 1370 1371 1372

	return newsk;

out_overflow:
1373
	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1374
out_nonewsk:
L
Linus Torvalds 已提交
1375
	dst_release(dst);
1376
out:
1377
	tcp_listendrop(sk);
L
Linus Torvalds 已提交
1378 1379 1380 1381
	return NULL;
}

/* The socket must have it's spinlock held when we get
1382
 * here, unless it is a TCP_LISTEN socket.
L
Linus Torvalds 已提交
1383 1384 1385 1386 1387 1388 1389 1390
 *
 * We have a potential double-lock case here, so even when
 * doing backlog processing we use the BH locking scheme.
 * This is because we cannot sleep with the original spinlock
 * held.
 */
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
E
Eric Dumazet 已提交
1391
	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
L
Linus Torvalds 已提交
1392
	struct sk_buff *opt_skb = NULL;
E
Eric Dumazet 已提交
1393
	struct tcp_sock *tp;
L
Linus Torvalds 已提交
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421

	/* Imagine: socket is IPv6. IPv4 packet arrives,
	   goes to IPv4 receive handler and backlogged.
	   From backlog it always goes here. Kerboom...
	   Fortunately, tcp_rcv_established and rcv_established
	   handle them correctly, but it is not case with
	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
	 */

	if (skb->protocol == htons(ETH_P_IP))
		return tcp_v4_do_rcv(sk, skb);

	/*
	 *	socket locking is here for SMP purposes as backlog rcv
	 *	is currently called with bh processing disabled.
	 */

	/* Do Stevens' IPV6_PKTOPTIONS.

	   Yes, guys, it is the only place in our code, where we
	   may make it not affecting IPv4.
	   The rest of code is protocol independent,
	   and I do not like idea to uglify IPv4.

	   Actually, all the idea behind IPV6_PKTOPTIONS
	   looks not very well thought. For now we latch
	   options, received in the last packet, enqueued
	   by tcp. Feel free to propose better solution.
1422
					       --ANK (980728)
L
Linus Torvalds 已提交
1423 1424
	 */
	if (np->rxopt.all)
1425
		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
L
Linus Torvalds 已提交
1426 1427

	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
E
Eric Dumazet 已提交
1428 1429
		struct dst_entry *dst = sk->sk_rx_dst;

1430
		sock_rps_save_rxhash(sk, skb);
1431
		sk_mark_napi_id(sk, skb);
E
Eric Dumazet 已提交
1432 1433 1434 1435 1436 1437 1438 1439
		if (dst) {
			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
				dst_release(dst);
				sk->sk_rx_dst = NULL;
			}
		}

1440
		tcp_rcv_established(sk, skb);
L
Linus Torvalds 已提交
1441 1442 1443 1444 1445
		if (opt_skb)
			goto ipv6_pktoptions;
		return 0;
	}

E
Eric Dumazet 已提交
1446
	if (tcp_checksum_complete(skb))
L
Linus Torvalds 已提交
1447 1448
		goto csum_err;

1449
	if (sk->sk_state == TCP_LISTEN) {
1450 1451
		struct sock *nsk = tcp_v6_cookie_check(sk, skb);

L
Linus Torvalds 已提交
1452 1453 1454
		if (!nsk)
			goto discard;

W
Weilong Chen 已提交
1455
		if (nsk != sk) {
L
Linus Torvalds 已提交
1456 1457 1458 1459 1460 1461
			if (tcp_child_process(sk, nsk, skb))
				goto reset;
			if (opt_skb)
				__kfree_skb(opt_skb);
			return 0;
		}
1462
	} else
1463
		sock_rps_save_rxhash(sk, skb);
L
Linus Torvalds 已提交
1464

1465
	if (tcp_rcv_state_process(sk, skb))
L
Linus Torvalds 已提交
1466 1467 1468 1469 1470 1471
		goto reset;
	if (opt_skb)
		goto ipv6_pktoptions;
	return 0;

reset:
1472
	tcp_v6_send_reset(sk, skb);
L
Linus Torvalds 已提交
1473 1474 1475 1476 1477 1478
discard:
	if (opt_skb)
		__kfree_skb(opt_skb);
	kfree_skb(skb);
	return 0;
csum_err:
1479 1480
	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
L
Linus Torvalds 已提交
1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
	goto discard;


ipv6_pktoptions:
	/* Do you ask, what is it?

	   1. skb was enqueued by tcp.
	   2. skb is added to tail of read queue, rather than out of order.
	   3. socket is not in passive state.
	   4. Finally, it really contains options, which user wants to receive.
	 */
	tp = tcp_sk(sk);
	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1495
		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
E
Eric Dumazet 已提交
1496
			np->mcast_oif = tcp_v6_iif(opt_skb);
1497
		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1498
			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1499
		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1500
			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1501 1502
		if (np->repflow)
			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1503
		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
L
Linus Torvalds 已提交
1504
			skb_set_owner_r(opt_skb, sk);
1505
			tcp_v6_restore_cb(opt_skb);
L
Linus Torvalds 已提交
1506 1507 1508 1509 1510 1511 1512
			opt_skb = xchg(&np->pktoptions, opt_skb);
		} else {
			__kfree_skb(opt_skb);
			opt_skb = xchg(&np->pktoptions, NULL);
		}
	}

1513
	kfree_skb(opt_skb);
L
Linus Torvalds 已提交
1514 1515 1516
	return 0;
}

1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536
static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
			   const struct tcphdr *th)
{
	/* This is tricky: we move IP6CB at its correct location into
	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
	 * _decode_session6() uses IP6CB().
	 * barrier() makes sure compiler won't play aliasing games.
	 */
	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
		sizeof(struct inet6_skb_parm));
	barrier();

	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
				    skb->len - th->doff*4);
	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
	TCP_SKB_CB(skb)->sacked = 0;
1537 1538
	TCP_SKB_CB(skb)->has_rxtstamp =
			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1539 1540
}

1541
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
1542
{
E
Eric Dumazet 已提交
1543
	struct sk_buff *skb_to_free;
1544
	int sdif = inet6_sdif(skb);
1545
	int dif = inet6_iif(skb);
1546
	const struct tcphdr *th;
1547
	const struct ipv6hdr *hdr;
1548
	bool refcounted;
L
Linus Torvalds 已提交
1549 1550
	struct sock *sk;
	int ret;
1551
	struct net *net = dev_net(skb->dev);
L
Linus Torvalds 已提交
1552 1553 1554 1555 1556 1557 1558

	if (skb->pkt_type != PACKET_HOST)
		goto discard_it;

	/*
	 *	Count it even if it's bad.
	 */
E
Eric Dumazet 已提交
1559
	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
L
Linus Torvalds 已提交
1560 1561 1562 1563

	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
		goto discard_it;

1564
	th = (const struct tcphdr *)skb->data;
L
Linus Torvalds 已提交
1565

1566
	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
L
Linus Torvalds 已提交
1567 1568 1569 1570
		goto bad_packet;
	if (!pskb_may_pull(skb, th->doff*4))
		goto discard_it;

1571
	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1572
		goto csum_error;
L
Linus Torvalds 已提交
1573

1574
	th = (const struct tcphdr *)skb->data;
1575
	hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
1576

1577
lookup:
1578
	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1579
				th->source, th->dest, inet6_iif(skb), sdif,
1580
				&refcounted);
L
Linus Torvalds 已提交
1581 1582 1583 1584 1585 1586 1587
	if (!sk)
		goto no_tcp_socket;

process:
	if (sk->sk_state == TCP_TIME_WAIT)
		goto do_time_wait;

1588 1589
	if (sk->sk_state == TCP_NEW_SYN_RECV) {
		struct request_sock *req = inet_reqsk(sk);
1590
		bool req_stolen = false;
1591
		struct sock *nsk;
1592 1593

		sk = req->rsk_listener;
1594
		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1595
			sk_drops_add(sk, skb);
1596 1597 1598
			reqsk_put(req);
			goto discard_it;
		}
1599 1600 1601 1602
		if (tcp_checksum_complete(skb)) {
			reqsk_put(req);
			goto csum_error;
		}
1603
		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1604
			inet_csk_reqsk_queue_drop_and_put(sk, req);
1605 1606
			goto lookup;
		}
1607
		sock_hold(sk);
1608
		refcounted = true;
E
Eric Dumazet 已提交
1609
		nsk = NULL;
1610 1611 1612 1613
		if (!tcp_filter(sk, skb)) {
			th = (const struct tcphdr *)skb->data;
			hdr = ipv6_hdr(skb);
			tcp_v6_fill_cb(skb, hdr, th);
1614
			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1615
		}
1616 1617
		if (!nsk) {
			reqsk_put(req);
1618 1619 1620 1621 1622 1623 1624 1625 1626 1627
			if (req_stolen) {
				/* Another cpu got exclusive access to req
				 * and created a full blown socket.
				 * Try to feed this packet to this socket
				 * instead of discarding it.
				 */
				tcp_v6_restore_cb(skb);
				sock_put(sk);
				goto lookup;
			}
1628
			goto discard_and_relse;
1629 1630 1631 1632 1633 1634
		}
		if (nsk == sk) {
			reqsk_put(req);
			tcp_v6_restore_cb(skb);
		} else if (tcp_child_process(sk, nsk, skb)) {
			tcp_v6_send_reset(nsk, skb);
1635
			goto discard_and_relse;
1636
		} else {
1637
			sock_put(sk);
1638 1639 1640
			return 0;
		}
	}
E
Eric Dumazet 已提交
1641
	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1642
		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1643 1644 1645
		goto discard_and_relse;
	}

L
Linus Torvalds 已提交
1646 1647 1648
	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
		goto discard_and_relse;

1649
	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1650 1651
		goto discard_and_relse;

1652
	if (tcp_filter(sk, skb))
L
Linus Torvalds 已提交
1653
		goto discard_and_relse;
1654 1655
	th = (const struct tcphdr *)skb->data;
	hdr = ipv6_hdr(skb);
1656
	tcp_v6_fill_cb(skb, hdr, th);
L
Linus Torvalds 已提交
1657 1658 1659

	skb->dev = NULL;

1660 1661 1662 1663 1664 1665 1666
	if (sk->sk_state == TCP_LISTEN) {
		ret = tcp_v6_do_rcv(sk, skb);
		goto put_and_return;
	}

	sk_incoming_cpu_update(sk);

1667
	bh_lock_sock_nested(sk);
1668
	tcp_segs_in(tcp_sk(sk), skb);
L
Linus Torvalds 已提交
1669 1670
	ret = 0;
	if (!sock_owned_by_user(sk)) {
E
Eric Dumazet 已提交
1671 1672
		skb_to_free = sk->sk_rx_skb_cache;
		sk->sk_rx_skb_cache = NULL;
F
Florian Westphal 已提交
1673
		ret = tcp_v6_do_rcv(sk, skb);
E
Eric Dumazet 已提交
1674 1675 1676 1677
	} else {
		if (tcp_add_backlog(sk, skb))
			goto discard_and_relse;
		skb_to_free = NULL;
Z
Zhu Yi 已提交
1678
	}
L
Linus Torvalds 已提交
1679
	bh_unlock_sock(sk);
E
Eric Dumazet 已提交
1680 1681
	if (skb_to_free)
		__kfree_skb(skb_to_free);
1682
put_and_return:
1683 1684
	if (refcounted)
		sock_put(sk);
L
Linus Torvalds 已提交
1685 1686 1687 1688 1689 1690
	return ret ? -1 : 0;

no_tcp_socket:
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
		goto discard_it;

1691 1692
	tcp_v6_fill_cb(skb, hdr, th);

E
Eric Dumazet 已提交
1693
	if (tcp_checksum_complete(skb)) {
1694
csum_error:
E
Eric Dumazet 已提交
1695
		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
L
Linus Torvalds 已提交
1696
bad_packet:
E
Eric Dumazet 已提交
1697
		__TCP_INC_STATS(net, TCP_MIB_INERRS);
L
Linus Torvalds 已提交
1698
	} else {
1699
		tcp_v6_send_reset(NULL, skb);
L
Linus Torvalds 已提交
1700 1701 1702 1703 1704 1705 1706
	}

discard_it:
	kfree_skb(skb);
	return 0;

discard_and_relse:
1707
	sk_drops_add(sk, skb);
1708 1709
	if (refcounted)
		sock_put(sk);
L
Linus Torvalds 已提交
1710 1711 1712 1713
	goto discard_it;

do_time_wait:
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1714
		inet_twsk_put(inet_twsk(sk));
L
Linus Torvalds 已提交
1715 1716 1717
		goto discard_it;
	}

1718 1719
	tcp_v6_fill_cb(skb, hdr, th);

1720 1721 1722
	if (tcp_checksum_complete(skb)) {
		inet_twsk_put(inet_twsk(sk));
		goto csum_error;
L
Linus Torvalds 已提交
1723 1724
	}

1725
	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
L
Linus Torvalds 已提交
1726 1727 1728 1729
	case TCP_TW_SYN:
	{
		struct sock *sk2;

1730
		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1731
					    skb, __tcp_hdrlen(th),
1732
					    &ipv6_hdr(skb)->saddr, th->source,
1733
					    &ipv6_hdr(skb)->daddr,
1734 1735
					    ntohs(th->dest),
					    tcp_v6_iif_l3_slave(skb),
1736
					    sdif);
1737
		if (sk2) {
1738
			struct inet_timewait_sock *tw = inet_twsk(sk);
1739
			inet_twsk_deschedule_put(tw);
L
Linus Torvalds 已提交
1740
			sk = sk2;
1741
			tcp_v6_restore_cb(skb);
1742
			refcounted = false;
L
Linus Torvalds 已提交
1743 1744 1745
			goto process;
		}
	}
1746
		/* to ACK */
J
Joe Perches 已提交
1747
		fallthrough;
L
Linus Torvalds 已提交
1748 1749 1750 1751
	case TCP_TW_ACK:
		tcp_v6_timewait_ack(sk, skb);
		break;
	case TCP_TW_RST:
1752 1753 1754
		tcp_v6_send_reset(sk, skb);
		inet_twsk_deschedule_put(inet_twsk(sk));
		goto discard_it;
W
Wang Yufen 已提交
1755 1756
	case TCP_TW_SUCCESS:
		;
L
Linus Torvalds 已提交
1757 1758 1759 1760
	}
	goto discard_it;
}

1761
INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
E
Eric Dumazet 已提交
1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778
{
	const struct ipv6hdr *hdr;
	const struct tcphdr *th;
	struct sock *sk;

	if (skb->pkt_type != PACKET_HOST)
		return;

	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
		return;

	hdr = ipv6_hdr(skb);
	th = tcp_hdr(skb);

	if (th->doff < sizeof(struct tcphdr) / 4)
		return;

E
Eric Dumazet 已提交
1779
	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
E
Eric Dumazet 已提交
1780 1781 1782
	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
					&hdr->saddr, th->source,
					&hdr->daddr, ntohs(th->dest),
1783
					inet6_iif(skb), inet6_sdif(skb));
E
Eric Dumazet 已提交
1784 1785 1786
	if (sk) {
		skb->sk = sk;
		skb->destructor = sock_edemux;
1787
		if (sk_fullsock(sk)) {
1788
			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1789

E
Eric Dumazet 已提交
1790
			if (dst)
E
Eric Dumazet 已提交
1791
				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
E
Eric Dumazet 已提交
1792
			if (dst &&
1793
			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
E
Eric Dumazet 已提交
1794 1795 1796 1797 1798
				skb_dst_set_noref(skb, dst);
		}
	}
}

1799 1800 1801
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
	.twsk_unique	= tcp_twsk_unique,
W
Wang Yufen 已提交
1802
	.twsk_destructor = tcp_twsk_destructor,
1803 1804
};

1805
const struct inet_connection_sock_af_ops ipv6_specific = {
1806 1807 1808
	.queue_xmit	   = inet6_csk_xmit,
	.send_check	   = tcp_v6_send_check,
	.rebuild_header	   = inet6_sk_rebuild_header,
E
Eric Dumazet 已提交
1809
	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1810 1811 1812
	.conn_request	   = tcp_v6_conn_request,
	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
	.net_header_len	   = sizeof(struct ipv6hdr),
1813
	.net_frag_header_len = sizeof(struct frag_hdr),
1814 1815 1816 1817
	.setsockopt	   = ipv6_setsockopt,
	.getsockopt	   = ipv6_getsockopt,
	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1818
#ifdef CONFIG_COMPAT
1819 1820
	.compat_setsockopt = compat_ipv6_setsockopt,
	.compat_getsockopt = compat_ipv6_getsockopt,
1821
#endif
1822
	.mtu_reduced	   = tcp_v6_mtu_reduced,
L
Linus Torvalds 已提交
1823 1824
};

1825
#ifdef CONFIG_TCP_MD5SIG
1826
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1827
	.md5_lookup	=	tcp_v6_md5_lookup,
1828
	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1829 1830
	.md5_parse	=	tcp_v6_parse_md5_keys,
};
1831
#endif
1832

L
Linus Torvalds 已提交
1833 1834 1835
/*
 *	TCP over IPv4 via INET6 API
 */
1836
static const struct inet_connection_sock_af_ops ipv6_mapped = {
1837 1838 1839
	.queue_xmit	   = ip_queue_xmit,
	.send_check	   = tcp_v4_send_check,
	.rebuild_header	   = inet_sk_rebuild_header,
1840
	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1841 1842 1843 1844 1845 1846 1847
	.conn_request	   = tcp_v6_conn_request,
	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
	.net_header_len	   = sizeof(struct iphdr),
	.setsockopt	   = ipv6_setsockopt,
	.getsockopt	   = ipv6_getsockopt,
	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1848
#ifdef CONFIG_COMPAT
1849 1850
	.compat_setsockopt = compat_ipv6_setsockopt,
	.compat_getsockopt = compat_ipv6_getsockopt,
1851
#endif
1852
	.mtu_reduced	   = tcp_v4_mtu_reduced,
L
Linus Torvalds 已提交
1853 1854
};

1855
#ifdef CONFIG_TCP_MD5SIG
1856
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1857
	.md5_lookup	=	tcp_v4_md5_lookup,
1858
	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1859 1860
	.md5_parse	=	tcp_v6_parse_md5_keys,
};
1861
#endif
1862

L
Linus Torvalds 已提交
1863 1864 1865 1866 1867
/* NOTE: A lot of things set to zero explicitly by call to
 *       sk_alloc() so need not be done here.
 */
static int tcp_v6_init_sock(struct sock *sk)
{
1868
	struct inet_connection_sock *icsk = inet_csk(sk);
L
Linus Torvalds 已提交
1869

1870
	tcp_init_sock(sk);
L
Linus Torvalds 已提交
1871

1872
	icsk->icsk_af_ops = &ipv6_specific;
L
Linus Torvalds 已提交
1873

1874
#ifdef CONFIG_TCP_MD5SIG
1875
	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1876 1877
#endif

L
Linus Torvalds 已提交
1878 1879 1880
	return 0;
}

1881
static void tcp_v6_destroy_sock(struct sock *sk)
L
Linus Torvalds 已提交
1882 1883
{
	tcp_v4_destroy_sock(sk);
1884
	inet6_destroy_sock(sk);
L
Linus Torvalds 已提交
1885 1886
}

1887
#ifdef CONFIG_PROC_FS
L
Linus Torvalds 已提交
1888
/* Proc filesystem TCPv6 sock list dumping. */
1889
static void get_openreq6(struct seq_file *seq,
E
Eric Dumazet 已提交
1890
			 const struct request_sock *req, int i)
L
Linus Torvalds 已提交
1891
{
1892
	long ttd = req->rsk_timer.expires - jiffies;
1893 1894
	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
L
Linus Torvalds 已提交
1895 1896 1897 1898 1899 1900

	if (ttd < 0)
		ttd = 0;

	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1901
		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
L
Linus Torvalds 已提交
1902 1903 1904
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3],
E
Eric Dumazet 已提交
1905
		   inet_rsk(req)->ir_num,
L
Linus Torvalds 已提交
1906 1907
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3],
1908
		   ntohs(inet_rsk(req)->ir_rmt_port),
L
Linus Torvalds 已提交
1909
		   TCP_SYN_RECV,
W
Weilong Chen 已提交
1910
		   0, 0, /* could print option size, but that is af dependent. */
1911 1912
		   1,   /* timers active (only the expire timer) */
		   jiffies_to_clock_t(ttd),
1913
		   req->num_timeout,
E
Eric Dumazet 已提交
1914 1915
		   from_kuid_munged(seq_user_ns(seq),
				    sock_i_uid(req->rsk_listener)),
1916
		   0,  /* non standard timer */
L
Linus Torvalds 已提交
1917 1918 1919 1920 1921 1922
		   0, /* open_requests have no inode */
		   0, req);
}

static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
{
1923
	const struct in6_addr *dest, *src;
L
Linus Torvalds 已提交
1924 1925 1926
	__u16 destp, srcp;
	int timer_active;
	unsigned long timer_expires;
1927 1928
	const struct inet_sock *inet = inet_sk(sp);
	const struct tcp_sock *tp = tcp_sk(sp);
1929
	const struct inet_connection_sock *icsk = inet_csk(sp);
1930
	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1931 1932
	int rx_queue;
	int state;
L
Linus Torvalds 已提交
1933

1934 1935
	dest  = &sp->sk_v6_daddr;
	src   = &sp->sk_v6_rcv_saddr;
E
Eric Dumazet 已提交
1936 1937
	destp = ntohs(inet->inet_dport);
	srcp  = ntohs(inet->inet_sport);
1938

1939
	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1940
	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1941
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
L
Linus Torvalds 已提交
1942
		timer_active	= 1;
1943 1944
		timer_expires	= icsk->icsk_timeout;
	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
L
Linus Torvalds 已提交
1945
		timer_active	= 4;
1946
		timer_expires	= icsk->icsk_timeout;
L
Linus Torvalds 已提交
1947 1948 1949 1950 1951 1952 1953 1954
	} else if (timer_pending(&sp->sk_timer)) {
		timer_active	= 2;
		timer_expires	= sp->sk_timer.expires;
	} else {
		timer_active	= 0;
		timer_expires = jiffies;
	}

1955
	state = inet_sk_state_load(sp);
1956
	if (state == TCP_LISTEN)
1957
		rx_queue = READ_ONCE(sp->sk_ack_backlog);
1958 1959 1960 1961
	else
		/* Because we don't lock the socket,
		 * we might find a transient negative value.
		 */
1962
		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1963
				      READ_ONCE(tp->copied_seq), 0);
1964

L
Linus Torvalds 已提交
1965 1966
	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1967
		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
L
Linus Torvalds 已提交
1968 1969 1970 1971 1972
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3], srcp,
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1973
		   state,
1974
		   READ_ONCE(tp->write_seq) - tp->snd_una,
1975
		   rx_queue,
L
Linus Torvalds 已提交
1976
		   timer_active,
1977
		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1978
		   icsk->icsk_retransmits,
1979
		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1980
		   icsk->icsk_probes_out,
L
Linus Torvalds 已提交
1981
		   sock_i_ino(sp),
1982
		   refcount_read(&sp->sk_refcnt), sp,
1983 1984
		   jiffies_to_clock_t(icsk->icsk_rto),
		   jiffies_to_clock_t(icsk->icsk_ack.ato),
W
Wei Wang 已提交
1985
		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
I
Ilpo Järvinen 已提交
1986
		   tp->snd_cwnd,
1987
		   state == TCP_LISTEN ?
1988
			fastopenq->max_qlen :
1989
			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
L
Linus Torvalds 已提交
1990 1991 1992
		   );
}

1993
static void get_timewait6_sock(struct seq_file *seq,
1994
			       struct inet_timewait_sock *tw, int i)
L
Linus Torvalds 已提交
1995
{
1996
	long delta = tw->tw_timer.expires - jiffies;
1997
	const struct in6_addr *dest, *src;
L
Linus Torvalds 已提交
1998 1999
	__u16 destp, srcp;

2000 2001
	dest = &tw->tw_v6_daddr;
	src  = &tw->tw_v6_rcv_saddr;
L
Linus Torvalds 已提交
2002 2003 2004 2005 2006
	destp = ntohs(tw->tw_dport);
	srcp  = ntohs(tw->tw_sport);

	seq_printf(seq,
		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
D
Dan Rosenberg 已提交
2007
		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
L
Linus Torvalds 已提交
2008 2009 2010 2011 2012 2013
		   i,
		   src->s6_addr32[0], src->s6_addr32[1],
		   src->s6_addr32[2], src->s6_addr32[3], srcp,
		   dest->s6_addr32[0], dest->s6_addr32[1],
		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
		   tw->tw_substate, 0, 0,
2014
		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2015
		   refcount_read(&tw->tw_refcnt), tw);
L
Linus Torvalds 已提交
2016 2017 2018 2019 2020
}

static int tcp6_seq_show(struct seq_file *seq, void *v)
{
	struct tcp_iter_state *st;
E
Eric Dumazet 已提交
2021
	struct sock *sk = v;
L
Linus Torvalds 已提交
2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033

	if (v == SEQ_START_TOKEN) {
		seq_puts(seq,
			 "  sl  "
			 "local_address                         "
			 "remote_address                        "
			 "st tx_queue rx_queue tr tm->when retrnsmt"
			 "   uid  timeout inode\n");
		goto out;
	}
	st = seq->private;

2034 2035 2036
	if (sk->sk_state == TCP_TIME_WAIT)
		get_timewait6_sock(seq, v, st->num);
	else if (sk->sk_state == TCP_NEW_SYN_RECV)
E
Eric Dumazet 已提交
2037
		get_openreq6(seq, v, st->num);
2038 2039
	else
		get_tcp6_sock(seq, v, st->num);
L
Linus Torvalds 已提交
2040 2041 2042 2043
out:
	return 0;
}

2044 2045 2046 2047 2048 2049 2050
static const struct seq_operations tcp6_seq_ops = {
	.show		= tcp6_seq_show,
	.start		= tcp_seq_start,
	.next		= tcp_seq_next,
	.stop		= tcp_seq_stop,
};

L
Linus Torvalds 已提交
2051 2052 2053 2054
static struct tcp_seq_afinfo tcp6_seq_afinfo = {
	.family		= AF_INET6,
};

2055
int __net_init tcp6_proc_init(struct net *net)
L
Linus Torvalds 已提交
2056
{
2057 2058
	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2059 2060
		return -ENOMEM;
	return 0;
L
Linus Torvalds 已提交
2061 2062
}

2063
void tcp6_proc_exit(struct net *net)
L
Linus Torvalds 已提交
2064
{
2065
	remove_proc_entry("tcp6", net->proc_net);
L
Linus Torvalds 已提交
2066 2067 2068 2069 2070 2071 2072
}
#endif

struct proto tcpv6_prot = {
	.name			= "TCPv6",
	.owner			= THIS_MODULE,
	.close			= tcp_close,
A
Andrey Ignatov 已提交
2073
	.pre_connect		= tcp_v6_pre_connect,
L
Linus Torvalds 已提交
2074 2075
	.connect		= tcp_v6_connect,
	.disconnect		= tcp_disconnect,
2076
	.accept			= inet_csk_accept,
L
Linus Torvalds 已提交
2077 2078 2079 2080 2081 2082
	.ioctl			= tcp_ioctl,
	.init			= tcp_v6_init_sock,
	.destroy		= tcp_v6_destroy_sock,
	.shutdown		= tcp_shutdown,
	.setsockopt		= tcp_setsockopt,
	.getsockopt		= tcp_getsockopt,
2083
	.keepalive		= tcp_set_keepalive,
L
Linus Torvalds 已提交
2084
	.recvmsg		= tcp_recvmsg,
2085 2086
	.sendmsg		= tcp_sendmsg,
	.sendpage		= tcp_sendpage,
L
Linus Torvalds 已提交
2087
	.backlog_rcv		= tcp_v6_do_rcv,
E
Eric Dumazet 已提交
2088
	.release_cb		= tcp_release_cb,
2089
	.hash			= inet6_hash,
2090 2091
	.unhash			= inet_unhash,
	.get_port		= inet_csk_get_port,
L
Linus Torvalds 已提交
2092
	.enter_memory_pressure	= tcp_enter_memory_pressure,
2093
	.leave_memory_pressure	= tcp_leave_memory_pressure,
2094
	.stream_memory_free	= tcp_stream_memory_free,
L
Linus Torvalds 已提交
2095 2096 2097
	.sockets_allocated	= &tcp_sockets_allocated,
	.memory_allocated	= &tcp_memory_allocated,
	.memory_pressure	= &tcp_memory_pressure,
2098
	.orphan_count		= &tcp_orphan_count,
2099
	.sysctl_mem		= sysctl_tcp_mem,
2100 2101
	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
L
Linus Torvalds 已提交
2102 2103
	.max_header		= MAX_TCP_HEADER,
	.obj_size		= sizeof(struct tcp6_sock),
2104
	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2105
	.twsk_prot		= &tcp6_timewait_sock_ops,
2106
	.rsk_prot		= &tcp6_request_sock_ops,
2107
	.h.hashinfo		= &tcp_hashinfo,
2108
	.no_autobind		= true,
2109 2110 2111
#ifdef CONFIG_COMPAT
	.compat_setsockopt	= compat_tcp_setsockopt,
	.compat_getsockopt	= compat_tcp_getsockopt,
G
Glauber Costa 已提交
2112
#endif
2113
	.diag_destroy		= tcp_abort,
L
Linus Torvalds 已提交
2114 2115
};

2116 2117 2118
/* thinking of making this const? Don't.
 * early_demux can change based on sysctl.
 */
2119
static struct inet6_protocol tcpv6_protocol = {
E
Eric Dumazet 已提交
2120
	.early_demux	=	tcp_v6_early_demux,
2121
	.early_demux_handler =  tcp_v6_early_demux,
L
Linus Torvalds 已提交
2122 2123 2124 2125 2126 2127 2128 2129 2130 2131
	.handler	=	tcp_v6_rcv,
	.err_handler	=	tcp_v6_err,
	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};

static struct inet_protosw tcpv6_protosw = {
	.type		=	SOCK_STREAM,
	.protocol	=	IPPROTO_TCP,
	.prot		=	&tcpv6_prot,
	.ops		=	&inet6_stream_ops,
2132 2133
	.flags		=	INET_PROTOSW_PERMANENT |
				INET_PROTOSW_ICSK,
L
Linus Torvalds 已提交
2134 2135
};

2136
static int __net_init tcpv6_net_init(struct net *net)
2137
{
2138 2139
	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
				    SOCK_RAW, IPPROTO_TCP, net);
2140 2141
}

2142
static void __net_exit tcpv6_net_exit(struct net *net)
2143
{
2144
	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
E
Eric W. Biederman 已提交
2145 2146
}

2147
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
E
Eric W. Biederman 已提交
2148
{
2149
	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2150 2151 2152
}

static struct pernet_operations tcpv6_net_ops = {
E
Eric W. Biederman 已提交
2153 2154 2155
	.init	    = tcpv6_net_init,
	.exit	    = tcpv6_net_exit,
	.exit_batch = tcpv6_net_exit_batch,
2156 2157
};

2158
int __init tcpv6_init(void)
L
Linus Torvalds 已提交
2159
{
2160 2161
	int ret;

2162 2163
	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
	if (ret)
2164
		goto out;
2165

L
Linus Torvalds 已提交
2166
	/* register inet6 protocol */
2167 2168 2169 2170
	ret = inet6_register_protosw(&tcpv6_protosw);
	if (ret)
		goto out_tcpv6_protocol;

2171
	ret = register_pernet_subsys(&tcpv6_net_ops);
2172 2173
	if (ret)
		goto out_tcpv6_protosw;
M
Mat Martineau 已提交
2174 2175 2176 2177 2178

	ret = mptcpv6_init();
	if (ret)
		goto out_tcpv6_pernet_subsys;

2179 2180
out:
	return ret;
2181

M
Mat Martineau 已提交
2182 2183
out_tcpv6_pernet_subsys:
	unregister_pernet_subsys(&tcpv6_net_ops);
2184 2185
out_tcpv6_protosw:
	inet6_unregister_protosw(&tcpv6_protosw);
2186 2187
out_tcpv6_protocol:
	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2188 2189 2190
	goto out;
}

2191
void tcpv6_exit(void)
2192
{
2193
	unregister_pernet_subsys(&tcpv6_net_ops);
2194 2195
	inet6_unregister_protosw(&tcpv6_protosw);
	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
L
Linus Torvalds 已提交
2196
}