diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f1f32af6d9b96c8d21f55fe6e875673c15e0eab2..0ef2a97ccdb50bc83baaf727f37d2fa034eea036 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -264,9 +264,9 @@ struct tcp6_timewait_sock { }; #if IS_ENABLED(CONFIG_IPV6) -static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) +static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { - return inet_sk(__sk)->pinet6; + return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 47eb67b08abdf28b185514cfc1a99685a8c8b8dd..f5bf7310e3343468bddf7e51940a77ed497ad7f1 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -245,7 +245,8 @@ static inline unsigned int __inet_ehashfn(const __be32 laddr, } struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, - struct sock *sk_listener); + struct sock *sk_listener, + bool attach_listener); static inline __u8 inet_sk_flowi_flags(const struct sock *sk) { diff --git a/include/net/ip.h b/include/net/ip.h index 91a6b2c88341b8d691af61db57fe77222d7e144f..aa7811993276fbc8fe10baff41b331fab0e5ea11 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -323,12 +323,15 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) { - if (!skb->sk || ip_sk_use_pmtu(skb->sk)) { + struct sock *sk = skb->sk; + + if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; + return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); - } else { - return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU); } + + return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU); } u32 ip_idents_reserve(u32 hash, int segs); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index dd423d84085247bd71e14c217876eeb653ac7a73..95ab5d7aab96f970a696b3c9f1112887810388d7 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -80,7 +80,8 @@ static inline struct sock *req_to_sk(struct request_sock *req) } static inline struct request_sock * -reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) +reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, + bool attach_listener) { struct request_sock *req; @@ -88,10 +89,15 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) if (req) { req->rsk_ops = ops; - sock_hold(sk_listener); - req->rsk_listener = sk_listener; + if (attach_listener) { + sock_hold(sk_listener); + req->rsk_listener = sk_listener; + } else { + req->rsk_listener = NULL; + } req_to_sk(req)->sk_prot = sk_listener->sk_prot; sk_node_init(&req_to_sk(req)->sk_node); + sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; /* Following is temporary. It is coupled with debugging * helpers in reqsk_put() & reqsk_free() diff --git a/net/core/dev.c b/net/core/dev.c index 323c04edd779af156c19538aa9cee649dc039811..a229bf0d649dc9bf58ece00e95d58f8a133addca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2974,6 +2974,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) new_index = skb_tx_hash(dev, skb); if (queue_index != new_index && sk && + sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8910c95677194f4f0261646dc97f0531cbc07670..8e99681c8189d82e3e5f6a3c6089b103c0df2ca3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -595,7 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet_reqsk_alloc(&dccp_request_sock_ops, sk); + req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true); if (req == NULL) goto drop; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1361a3f45df7635349f13e21db555f1180f174e1..aed314f8c7c60b00191aabc42c733f02304b18b0 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -319,7 +319,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk); + req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true); if (req == NULL) goto drop; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 729ceb5f63c69dc2270aafb81acd2a16ae791f33..8113c30ccf9641a76b1082bc31a9d87c76379f75 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -326,7 +326,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */ + req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */ if (!req) goto out; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 410ac481fda03cb7dc4250e736fc4523e16d53cb..93396bf7b475f3972d247d282faa406d962696dd 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -168,8 +168,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, TCP_TIMEOUT_INIT, TCP_RTO_MAX); atomic_set(&req->rsk_refcnt, 2); - /* Add the child socket directly into the accept queue */ - inet_csk_reqsk_queue_add(sk, req, child); /* Now finish processing the fastopen child socket. */ inet_csk(child)->icsk_af_ops->rebuild_header(child); @@ -178,12 +176,10 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, tcp_init_metrics(child); tcp_init_buffer_space(child); - /* Queue the data carried in the SYN packet. We need to first - * bump skb's refcnt because the caller will attempt to free it. - * Note that IPv6 might also have used skb_get() trick - * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts) - * So we need to eventually get a clone of the packet, - * before inserting it in sk_receive_queue. + /* Queue the data carried in the SYN packet. + * We used to play tricky games with skb_get(). + * With lockless listener, it is a dead end. + * Do not think about it. * * XXX (TFO) - we honor a zero-payload TFO request for now, * (any reason not to?) but no need to queue the skb since @@ -191,12 +187,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, */ end_seq = TCP_SKB_CB(skb)->end_seq; if (end_seq != TCP_SKB_CB(skb)->seq + 1) { - struct sk_buff *skb2; - - if (unlikely(skb_shared(skb))) - skb2 = skb_clone(skb, GFP_ATOMIC); - else - skb2 = skb_get(skb); + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (likely(skb2)) { skb_dst_drop(skb2); @@ -214,12 +205,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, } } tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq; - sk->sk_data_ready(sk); - bh_unlock_sock(child); - /* Note: sock_put(child) will be done by tcp_conn_request() - * after SYNACK packet is sent. + /* tcp_conn_request() is sending the SYNACK, + * and queues the child into listener accept queue. */ - WARN_ON(!req->sk); return child; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 27108757c310db853525b4189ad06af556741ec7..ddadb318e8507fa5be424f98221fba2079112afd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6042,9 +6042,11 @@ static void tcp_openreq_init(struct request_sock *req, } struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, - struct sock *sk_listener) + struct sock *sk_listener, + bool attach_listener) { - struct request_sock *req = reqsk_alloc(ops, sk_listener); + struct request_sock *req = reqsk_alloc(ops, sk_listener, + attach_listener); if (req) { struct inet_request_sock *ireq = inet_rsk(req); @@ -6143,7 +6145,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop; } - req = inet_reqsk_alloc(rsk_ops, sk); + req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie); if (!req) goto drop; @@ -6229,12 +6231,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_openreq_init_rwin(req, sk, dst); if (!want_cookie) { - fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); tcp_reqsk_record_syn(sk, req, skb); + fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); } if (fastopen_sk) { af_ops->send_synack(fastopen_sk, dst, &fl, req, skb_get_queue_mapping(skb), &foc, false); + /* Add the child socket directly into the accept queue */ + inet_csk_reqsk_queue_add(sk, req, fastopen_sk); + sk->sk_data_ready(sk); + bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); } else { tcp_rsk(req)->tfo_listener = false; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7606eba83e7b36f803271cd6e21ef5ea81aa0315..f610b5310b177959640dd071a5f2882c2ba1857f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -170,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); + req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false); if (!req) goto out;