diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 755702eefd9cfa9c53e0ab465eb306fb58c02f3d..c5195524d1ef8904af746889e9955271b2ff55c1 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -19,7 +19,7 @@ #define SO_BROADCAST 0x0020 #define SO_LINGER 0x0080 #define SO_OOBINLINE 0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT 0x0200 #define SO_TYPE 0x1008 #define SO_ERROR 0x1007 diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index f3f38a0e2ef91e79c0da872bcb83b2d85669b0cd..51c6401582ea677a1d9d211ce3652b833de9516c 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index 406b5838defd9dbcc28cc1dd6769735986eba7cf..50692b738c756172af61ab84f2f8693495ea6655 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -24,7 +24,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index d8e1132a1ab63f99588c8e2cf388fdfbb4b475b7..595391f0f98c65e2bddbcbbe70b7e4d5062082d9 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h index c8b87a828206885d424b28ecbdaf1fd90b685912..43e32621da7d01ab20ad81f0c520d6ee83682434 100644 --- a/arch/h8300/include/uapi/asm/socket.h +++ b/arch/h8300/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index f390896c31048c30135e9b77227c3c729b94e291..c567adc8bea535fb5bcc15156a51f4af55424bf3 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -31,7 +31,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 6a895155e7a39107c20e4365b1a442a38840fa93..519afa2755db5c8738bc7138f67aecc408d27fd2 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 9d11a771392364c34704f40089d02d950af0b313..7e2723637b356d08169ae18c5b82eb1403801a43 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -28,8 +28,7 @@ #define SO_LINGER 0x0080 /* Block on close of a reliable socket to transmit pending data. */ #define SO_OOBINLINE 0x0100 /* Receive out-of-band data in-band. */ -#if 0 -To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ +#define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #endif #define SO_TYPE 0x1008 /* Compatible name for SO_STYLE. */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index ab702c40b30e8173f5ec2d8160d928cd83a5dff1..5c7c7c988544558f632a84ab5f529801a27b8963 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -22,7 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index da2c8d3c209efa2b7cb9c225283fc0ba5cd82510..526e4b9aece0a0480f1631bcb492c83ffac2c735 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -13,7 +13,7 @@ #define SO_BROADCAST 0x0020 #define SO_LINGER 0x0080 #define SO_OOBINLINE 0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT 0x0200 #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 #define SO_SNDBUFFORCE 0x100a diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index e6ca31816cc96e62557521eaa15006678fcd65f6..a26dcaece509490746ab8b2a5d254125f6d4571f 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -29,7 +29,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_RCVLOWAT 16 #define SO_SNDLOWAT 17 #define SO_RCVTIMEO 18 diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 9ce60b68f0709468fa454f66d886bb48cb71c525..f99eea7fff0fc4af85cf41ab1fd91ff6e402352b 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -28,7 +28,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index fbbba57547d1f7cf7fa495f36b5dea3696ecc49b..cbbad74b2e06a0c6405ecabd4d054887b404668f 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -15,7 +15,7 @@ #define SO_PEERCRED 0x0040 #define SO_LINGER 0x0080 #define SO_OOBINLINE 0x0100 -/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_REUSEPORT 0x0200 #define SO_BSDCOMPAT 0x0400 #define SO_RCVLOWAT 0x0800 #define SO_SNDLOWAT 0x1000 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index dbf316487b510e94ea6d95423404102cceebe0d5..35905cb6e419a661c04e24bf205f5c4e1f1982f5 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -32,7 +32,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 diff --git a/include/linux/random.h b/include/linux/random.h index d9846088c2c5fe63ed7b97c0c1a19a5bf23f31dc..347ce553a3065fd5020eb1ab864434db488df27c 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -74,4 +74,10 @@ static inline int arch_get_random_int(unsigned int *v) } #endif +/* Pseudo random number generator from numerical recipes. */ +static inline u32 next_pseudo_random32(u32 seed) +{ + return seed * 1664525 + 1013904223; +} + #endif /* _LINUX_RANDOM_H */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 9e34c877a77093ff1257e18941949a12391363d6..7ca75cbbf75e2071ed068276c8f69f1a11990b2a 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -71,6 +71,8 @@ extern struct sock *__inet6_lookup_established(struct net *net, extern struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif); @@ -88,7 +90,8 @@ static inline struct sock *__inet6_lookup(struct net *net, if (sk) return sk; - return inet6_lookup_listener(net, hashinfo, daddr, hnum, dif); + return inet6_lookup_listener(net, hashinfo, saddr, sport, + daddr, hnum, dif); } static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 67a8fa098e3a2b0d456996f78af4ff845db4d079..7b2ae9d37076fb49c6aaa62f110318689d92dda8 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -81,7 +81,9 @@ struct inet_bind_bucket { struct net *ib_net; #endif unsigned short port; - signed short fastreuse; + signed char fastreuse; + signed char fastreuseport; + kuid_t fastuid; int num_owners; struct hlist_node node; struct hlist_head owners; @@ -257,15 +259,19 @@ extern void inet_unhash(struct sock *sk); extern struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + const __be32 saddr, + const __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif); static inline struct sock *inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif); + return __inet_lookup_listener(net, hashinfo, saddr, sport, + daddr, ntohs(dport), dif); } /* Socket demux engine toys. */ @@ -358,7 +364,8 @@ static inline struct sock *__inet_lookup(struct net *net, struct sock *sk = __inet_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif); - return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif); + return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport, + daddr, hnum, dif); } static inline struct sock *inet_lookup(struct net *net, diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 75ca9291cf2ce307a8e54dec8a559a65069945f9..36d9379d4c4b495a20b65e448f3ecc4da5b3bf5e 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -82,6 +82,7 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, break; case NFT_LOOKUP_LISTENER: sk = inet_lookup_listener(net, &tcp_hashinfo, + saddr, sport, daddr, dport, in->ifindex); @@ -151,6 +152,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, break; case NFT_LOOKUP_LISTENER: sk = inet6_lookup_listener(net, &tcp_hashinfo, + saddr, sport, daddr, ntohs(dport), in->ifindex); diff --git a/include/net/sock.h b/include/net/sock.h index 5a34e2f036575fc2015589f958103d4feba2985f..581dc6bd7dc633a23aef606826023f9b642bdeff 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -140,6 +140,7 @@ typedef __u64 __bitwise __addrpair; * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting + * @skc_reuseport: %SO_REUSEPORT setting * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol @@ -179,7 +180,8 @@ struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; - unsigned char skc_reuse; + unsigned char skc_reuse:4; + unsigned char skc_reuseport:4; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; @@ -297,6 +299,7 @@ struct sock { #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state #define sk_reuse __sk_common.skc_reuse +#define sk_reuseport __sk_common.skc_reuseport #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 3f6a992014101e004fb1fa64e6cd3bb5d973ef2e..4ef3acbba5daeba94e21ab2a9c4700c9b9cf2108 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -22,8 +22,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ - +#define SO_REUSEPORT 15 #ifndef SO_PASSCRED /* powerpc only differs in these */ #define SO_PASSCRED 16 #define SO_PEERCRED 17 diff --git a/net/core/sock.c b/net/core/sock.c index 8258fb741e9a1557c1345110c9b15958523e6507..235fb89e8973a864978c00acfa0dbc0173762082 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -665,6 +665,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_REUSEADDR: sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; + case SO_REUSEPORT: + sk->sk_reuseport = valbool; + break; case SO_TYPE: case SO_PROTOCOL: case SO_DOMAIN: @@ -972,6 +975,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_reuse; break; + case SO_REUSEPORT: + v.val = sk->sk_reuseport; + break; + case SO_KEEPALIVE: v.val = sock_flag(sk, SOCK_KEEPOPEN); break; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d0670f00d5243f95bec4536f60edf32fa2ded850..8bb623d357adf9c1ec2b88010338f5be1b3182ae 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -59,6 +59,8 @@ int inet_csk_bind_conflict(const struct sock *sk, struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; + int reuseport = sk->sk_reuseport; + kuid_t uid = sock_i_uid((struct sock *)sk); /* * Unlike other sk lookup places we do not check @@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if (!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) { + if ((!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + (!reuseport || !sk2->sk_reuseport || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2))))) { const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || sk2_rcv_saddr == sk_rcv_saddr(sk)) @@ -106,6 +111,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) int ret, attempts = 5; struct net *net = sock_net(sk); int smallest_size = -1, smallest_rover; + kuid_t uid = sock_i_uid(sk); local_bh_disable(); if (!snum) { @@ -125,9 +131,12 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == rover) { - if (tb->fastreuse > 0 && - sk->sk_reuse && - sk->sk_state != TCP_LISTEN && + if (((tb->fastreuse > 0 && + sk->sk_reuse && + sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && + uid_eq(tb->fastuid, uid))) && (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; @@ -185,14 +194,17 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) if (sk->sk_reuse == SK_FORCE_REUSE) goto success; - if (tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN && + if (((tb->fastreuse > 0 && + sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size == -1) { goto success; } else { ret = 1; if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && + if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + (sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size != -1 && --attempts >= 0) { spin_unlock(&head->lock); goto again; @@ -212,9 +224,23 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) tb->fastreuse = 1; else tb->fastreuse = 0; - } else if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; + if (sk->sk_reuseport) { + tb->fastreuseport = 1; + tb->fastuid = uid; + } else { + tb->fastreuseport = 0; + tb->fastuid = 0; + } + } else { + if (tb->fastreuse && + (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) + tb->fastreuse = 0; + if (tb->fastreuseport && + (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) { + tb->fastreuseport = 0; + tb->fastuid = 0; + } + } success: if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fa3ae814871082e22121855a4033ddc4ec21b1c7..0ce0595d98618492ce17c285aadba6076dcec9ce 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -39,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; + tb->fastreuseport = 0; tb->num_owners = 0; INIT_HLIST_HEAD(&tb->owners); hlist_add_head(&tb->node, &head->chain); @@ -151,16 +152,16 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && !ipv6_only_sock(sk)) { __be32 rcv_saddr = inet->inet_rcv_saddr; - score = sk->sk_family == PF_INET ? 1 : 0; + score = sk->sk_family == PF_INET ? 2 : 1; if (rcv_saddr) { if (rcv_saddr != daddr) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -176,6 +177,7 @@ static inline int compute_score(struct sock *sk, struct net *net, struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif) { @@ -183,17 +185,29 @@ struct sock *__inet_lookup_listener(struct net *net, struct hlist_nulls_node *node; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore; + int score, hiscore, matches = 0, reuseport = 0; + u32 phash = 0; rcu_read_lock(); begin: result = NULL; - hiscore = -1; + hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { result = sk; hiscore = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + phash = inet_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (((u64)phash * matches) >> 32 == 0) + result = sk; + phash = next_pseudo_random32(phash); } } /* @@ -501,7 +515,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, inet_bind_bucket_for_each(tb, node, &head->chain) { if (net_eq(ib_net(tb), net) && tb->port == port) { - if (tb->fastreuse >= 0) + if (tb->fastreuse >= 0 || + tb->fastreuseport >= 0) goto next_port; WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, @@ -518,6 +533,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, break; } tb->fastreuse = -1; + tb->fastreuseport = -1; goto ok; next_port: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c6ce9ca98d23781e83659de51aecd8808ad4adb3..bbbdcc5c19731d596955eadde274e6385f950828 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, ip_hdr(skb)->daddr, + &tcp_hashinfo, ip_hdr(skb)->saddr, + th->source, ip_hdr(skb)->daddr, ntohs(th->source), inet_iif(skb)); /* don't send rst if it can't find key */ if (!sk1) @@ -2074,6 +2075,7 @@ int tcp_v4_rcv(struct sk_buff *skb) case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); if (sk2) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf6158f1f46bd6ac156ece571ed965e038b9376f..e0610e4b51588179b27f32fd2f4d09dd29f754fe 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -139,6 +139,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, { struct sock *sk2; struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && @@ -147,6 +148,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!sk2->sk_reuseport || !sk->sk_reuseport || + !uid_eq(uid, sock_i_uid(sk2))) && (*saddr_comp)(sk, sk2)) { if (bitmap) __set_bit(udp_sk(sk2)->udp_port_hash >> log, @@ -169,6 +172,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, { struct sock *sk2; struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); int res = 0; spin_lock(&hslot2->lock); @@ -179,6 +183,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!sk2->sk_reuseport || !sk->sk_reuseport || + !uid_eq(uid, sock_i_uid(sk2))) && (*saddr_comp)(sk, sk2)) { res = 1; break; @@ -337,26 +343,26 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, !ipv6_only_sock(sk)) { struct inet_sock *inet = inet_sk(sk); - score = (sk->sk_family == PF_INET ? 1 : 0); + score = (sk->sk_family == PF_INET ? 2 : 1); if (inet->inet_rcv_saddr) { if (inet->inet_rcv_saddr != daddr) return -1; - score += 2; + score += 4; } if (inet->inet_daddr) { if (inet->inet_daddr != saddr) return -1; - score += 2; + score += 4; } if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -365,7 +371,6 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, /* * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) */ -#define SCORE2_MAX (1 + 2 + 2 + 2) static inline int compute_score2(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif) @@ -380,21 +385,21 @@ static inline int compute_score2(struct sock *sk, struct net *net, if (inet->inet_num != hnum) return -1; - score = (sk->sk_family == PF_INET ? 1 : 0); + score = (sk->sk_family == PF_INET ? 2 : 1); if (inet->inet_daddr) { if (inet->inet_daddr != saddr) return -1; - score += 2; + score += 4; } if (inet->inet_dport) { if (inet->inet_dport != sport) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -409,19 +414,29 @@ static struct sock *udp4_lib_lookup2(struct net *net, { struct sock *sk, *result; struct hlist_nulls_node *node; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; begin: result = NULL; - badness = -1; + badness = 0; udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { score = compute_score2(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { result = sk; badness = score; - if (score == SCORE2_MAX) - goto exact_match; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet_ehashfn(net, daddr, hnum, + saddr, htons(sport)); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -431,9 +446,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, */ if (get_nulls_value(node) != slot2) goto begin; - if (result) { -exact_match: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -457,7 +470,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; rcu_read_lock(); if (hslot->count > 10) { @@ -486,13 +500,24 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, } begin: result = NULL; - badness = -1; + badness = 0; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet_ehashfn(net, daddr, hnum, + saddr, htons(sport)); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 30647857a375bce469c50071636b5aef6c5a33d7..e4297a393678c5e7fadcd3119f3ce53301da07a0 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -32,6 +32,9 @@ int inet6_csk_bind_conflict(const struct sock *sk, { const struct sock *sk2; const struct hlist_node *node; + int reuse = sk->sk_reuse; + int reuseport = sk->sk_reuseport; + int uid = sock_i_uid((struct sock *)sk); /* We must walk the whole port owner list in this case. -DaveM */ /* @@ -42,11 +45,17 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if ((!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + (!reuseport || !sk2->sk_reuseport || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, + sock_i_uid((struct sock *)sk2))))) { + if (ipv6_rcv_saddr_equal(sk, sk2)) + break; + } + } } return node != NULL; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index dea17fd28e5037295d56cf517671b479a1bca825..32b4a1675d826d8ce50e36dbf314456075e1660e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -158,25 +158,38 @@ static inline int compute_score(struct sock *sk, struct net *net, } struct sock *inet6_lookup_listener(struct net *net, - struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, + struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, + const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; const struct hlist_nulls_node *node; struct sock *result; - int score, hiscore; + int score, hiscore, matches = 0, reuseport = 0; + u32 phash = 0; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; rcu_read_lock(); begin: result = NULL; - hiscore = -1; + hiscore = 0; sk_nulls_for_each(sk, node, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { hiscore = score; result = sk; + reuseport = sk->sk_reuseport; + if (reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (((u64)phash * matches) >> 32 == 0) + result = sk; + phash = next_pseudo_random32(phash); } } /* diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3701c3c6e2eb72792cdd9bf3f94ff3f2b421dd49..06087e58738a553ce2ff050cb6bbbcac5d393014 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -834,7 +834,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), - &tcp_hashinfo, &ipv6h->daddr, + &tcp_hashinfo, &ipv6h->saddr, + th->source, &ipv6h->daddr, ntohs(th->source), inet6_iif(skb)); if (!sk1) return; @@ -1598,6 +1599,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) struct sock *sk2; sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1afb635d9b57b0aecc86269bfc07ee39b8add330..cb5bf497c09c4e1ebffa8d2c2ad84c9fca678a15 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -203,7 +204,8 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; struct hlist_nulls_node *node; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; begin: result = NULL; @@ -214,8 +216,18 @@ static struct sock *udp6_lib_lookup2(struct net *net, if (score > badness) { result = sk; badness = score; - if (score == SCORE2_MAX) + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } else if (score == SCORE2_MAX) goto exact_match; + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -249,7 +261,8 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; rcu_read_lock(); if (hslot->count > 10) { @@ -284,6 +297,17 @@ struct sock *__udp6_lib_lookup(struct net *net, if (score > badness) { result = sk; badness = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = inet6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /*