inet6_hashtables.c 8.9 KB
Newer Older
1 2 3 4 5 6 7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Generic INET6 transport hashtables
 *
8
 * Authors:	Lotsa people, from code originally in tcp, generalised here
9
 *		by Arnaldo Carvalho de Melo <acme@mandriva.com>
10 11 12 13 14 15 16 17
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/module.h>
18
#include <linux/random.h>
19

20
#include <net/addrconf.h>
21 22 23
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
24
#include <net/secure_seq.h>
25
#include <net/ip.h>
26
#include <net/sock_reuseport.h>
27

28 29 30
u32 inet6_ehashfn(const struct net *net,
		  const struct in6_addr *laddr, const u16 lport,
		  const struct in6_addr *faddr, const __be16 fport)
31
{
32 33 34 35 36 37 38 39 40 41 42
	static u32 inet6_ehash_secret __read_mostly;
	static u32 ipv6_hash_secret __read_mostly;

	u32 lhash, fhash;

	net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
	net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));

	lhash = (__force u32)laddr->s6_addr32[3];
	fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);

43
	return __inet6_ehashfn(lhash, lport, fhash, fport,
44
			       inet6_ehash_secret + net_hash_mix(net));
45 46
}

47 48 49 50 51 52
/*
 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 *
 * The sockhash lock must be held as a reader here.
 */
53 54
struct sock *__inet6_lookup_established(struct net *net,
					struct inet_hashinfo *hashinfo,
55
					   const struct in6_addr *saddr,
A
Al Viro 已提交
56
					   const __be16 sport,
57 58 59 60 61
					   const struct in6_addr *daddr,
					   const u16 hnum,
					   const int dif)
{
	struct sock *sk;
62
	const struct hlist_nulls_node *node;
A
Al Viro 已提交
63
	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
64 65 66
	/* Optimize here for direct hit, only listening connections can
	 * have wildcards anyways.
	 */
67
	unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
68
	unsigned int slot = hash & hashinfo->ehash_mask;
69
	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
70

71 72 73

begin:
	sk_nulls_for_each_rcu(sk, node, &head->chain) {
74 75
		if (sk->sk_hash != hash)
			continue;
76 77
		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
			continue;
E
Eric Dumazet 已提交
78 79 80
		if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
			goto out;

81 82 83
		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
			sock_gen_put(sk);
			goto begin;
84
		}
85
		goto found;
86
	}
87 88 89
	if (get_nulls_value(node) != slot)
		goto begin;
out:
E
Eric Dumazet 已提交
90 91
	sk = NULL;
found:
92 93 94 95
	return sk;
}
EXPORT_SYMBOL(__inet6_lookup_established);

96
static inline int compute_score(struct sock *sk, struct net *net,
97 98 99 100 101 102
				const unsigned short hnum,
				const struct in6_addr *daddr,
				const int dif)
{
	int score = -1;

E
Eric Dumazet 已提交
103
	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
104 105 106
	    sk->sk_family == PF_INET6) {

		score = 1;
107 108
		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
109 110 111 112 113 114 115 116
				return -1;
			score++;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score++;
		}
117 118
		if (sk->sk_incoming_cpu == raw_smp_processor_id())
			score++;
119 120 121 122
	}
	return score;
}

123
/* called with rcu_read_lock() */
124
struct sock *inet6_lookup_listener(struct net *net,
125 126 127
		struct inet_hashinfo *hashinfo,
		struct sk_buff *skb, int doff,
		const struct in6_addr *saddr,
128
		const __be16 sport, const struct in6_addr *daddr,
129
		const unsigned short hnum, const int dif)
130
{
131 132
	unsigned int hash = inet_lhashfn(net, hnum);
	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
133 134 135
	int score, hiscore = 0, matches = 0, reuseport = 0;
	struct sock *sk, *result = NULL;
	u32 phash = 0;
136

137
	sk_for_each(sk, &ilb->head) {
138 139 140
		score = compute_score(sk, net, hnum, daddr, dif);
		if (score > hiscore) {
			hiscore = score;
141 142 143
			if (reuseport) {
				phash = inet6_ehashfn(net, daddr, hnum,
						      saddr, sport);
144 145 146 147
				result = reuseport_select_sock(sk, phash,
							       skb, doff);
				if (result)
					return result;
148 149
				matches = 1;
			}
150 151
			result = sk;
			reuseport = sk->sk_reuseport;
152 153
		} else if (score == hiscore && reuseport) {
			matches++;
154
			if (reciprocal_scale(phash, matches) == 0)
155 156
				result = sk;
			phash = next_pseudo_random32(phash);
157 158 159 160 161 162
		}
	}
	return result;
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);

163
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
164
			  struct sk_buff *skb, int doff,
A
Al Viro 已提交
165 166
			  const struct in6_addr *saddr, const __be16 sport,
			  const struct in6_addr *daddr, const __be16 dport,
167 168 169
			  const int dif)
{
	struct sock *sk;
170
	bool refcounted;
171

172
	sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
173 174 175
			    ntohs(dport), dif, &refcounted);
	if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
		sk = NULL;
176 177 178
	return sk;
}
EXPORT_SYMBOL_GPL(inet6_lookup);
179 180 181 182 183 184

static int __inet6_check_established(struct inet_timewait_death_row *death_row,
				     struct sock *sk, const __u16 lport,
				     struct inet_timewait_sock **twp)
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
185
	struct inet_sock *inet = inet_sk(sk);
186 187
	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
	const struct in6_addr *saddr = &sk->sk_v6_daddr;
188
	const int dif = sk->sk_bound_dev_if;
E
Eric Dumazet 已提交
189
	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
190 191
	struct net *net = sock_net(sk);
	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
E
Eric Dumazet 已提交
192
						inet->inet_dport);
193
	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
194
	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
195
	struct sock *sk2;
196
	const struct hlist_nulls_node *node;
E
Eric Dumazet 已提交
197
	struct inet_timewait_sock *tw = NULL;
198

199
	spin_lock(lock);
200

E
Eric Dumazet 已提交
201
	sk_nulls_for_each(sk2, node, &head->chain) {
202 203
		if (sk2->sk_hash != hash)
			continue;
204

205 206
		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
			if (sk2->sk_state == TCP_TIME_WAIT) {
E
Eric Dumazet 已提交
207 208
				tw = inet_twsk(sk2);
				if (twsk_unique(sk, sk2, twp))
209
					break;
E
Eric Dumazet 已提交
210
			}
211
			goto not_unique;
212
		}
213 214
	}

215
	/* Must record num and sport now. Otherwise we will see
216 217
	 * in hash table socket with a funny identity.
	 */
E
Eric Dumazet 已提交
218 219
	inet->inet_num = lport;
	inet->inet_sport = htons(lport);
220
	sk->sk_hash = hash;
221
	WARN_ON(!sk_unhashed(sk));
222
	__sk_nulls_add_node_rcu(sk, &head->chain);
223
	if (tw) {
224
		sk_nulls_del_node_init_rcu((struct sock *)tw);
225 226
		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
	}
227
	spin_unlock(lock);
228
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
229

230
	if (twp) {
231
		*twp = tw;
232
	} else if (tw) {
233
		/* Silly. Should hash-dance instead... */
234
		inet_twsk_deschedule_put(tw);
235 236 237 238
	}
	return 0;

not_unique:
239
	spin_unlock(lock);
240 241 242
	return -EADDRNOTAVAIL;
}

243
static u32 inet6_sk_port_offset(const struct sock *sk)
244 245
{
	const struct inet_sock *inet = inet_sk(sk);
246 247 248

	return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
					  sk->sk_v6_daddr.s6_addr32,
E
Eric Dumazet 已提交
249
					  inet->inet_dport);
250 251 252 253 254
}

int inet6_hash_connect(struct inet_timewait_death_row *death_row,
		       struct sock *sk)
{
255 256 257 258 259
	u32 port_offset = 0;

	if (!inet_sk(sk)->inet_num)
		port_offset = inet6_sk_port_offset(sk);
	return __inet_hash_connect(death_row, sk, port_offset,
260
				   __inet6_check_established);
261 262
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
263 264 265 266 267

int inet6_hash(struct sock *sk)
{
	if (sk->sk_state != TCP_CLOSE) {
		local_bh_disable();
268
		__inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
		local_bh_enable();
	}

	return 0;
}
EXPORT_SYMBOL_GPL(inet6_hash);

/* match_wildcard == true:  IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
 *                          only, and any IPv4 addresses if not IPv6 only
 * match_wildcard == false: addresses must be exactly the same, i.e.
 *                          IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
 *                          and 0.0.0.0 equals to 0.0.0.0 only
 */
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
			 bool match_wildcard)
{
	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
	int sk2_ipv6only = inet_v6_ipv6only(sk2);
	int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;

	/* if both are mapped, treat as IPv4 */
	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
		if (!sk2_ipv6only) {
			if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
				return 1;
			if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
				return match_wildcard;
		}
		return 0;
	}

	if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
		return 1;

	if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
		return 1;

	if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
	    !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
		return 1;

	if (sk2_rcv_saddr6 &&
	    ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
		return 1;

	return 0;
}
EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);