inet6_hashtables.c 9.4 KB
Newer Older
1 2 3 4 5 6 7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Generic INET6 transport hashtables
 *
8
 * Authors:	Lotsa people, from code originally in tcp, generalised here
9
 *		by Arnaldo Carvalho de Melo <acme@mandriva.com>
10 11 12 13 14 15 16 17
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/module.h>
18
#include <linux/random.h>
19

20
#include <net/addrconf.h>
21 22 23
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
24
#include <net/secure_seq.h>
25
#include <net/ip.h>
26
#include <net/sock_reuseport.h>
27

28 29 30
u32 inet6_ehashfn(const struct net *net,
		  const struct in6_addr *laddr, const u16 lport,
		  const struct in6_addr *faddr, const __be16 fport)
31
{
32 33 34 35 36 37 38 39 40 41 42
	static u32 inet6_ehash_secret __read_mostly;
	static u32 ipv6_hash_secret __read_mostly;

	u32 lhash, fhash;

	net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
	net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));

	lhash = (__force u32)laddr->s6_addr32[3];
	fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);

43
	return __inet6_ehashfn(lhash, lport, fhash, fport,
44
			       inet6_ehash_secret + net_hash_mix(net));
45 46
}

47 48 49 50 51 52
/*
 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 *
 * The sockhash lock must be held as a reader here.
 */
53 54
struct sock *__inet6_lookup_established(struct net *net,
					struct inet_hashinfo *hashinfo,
55
					   const struct in6_addr *saddr,
A
Al Viro 已提交
56
					   const __be16 sport,
57 58 59 60 61
					   const struct in6_addr *daddr,
					   const u16 hnum,
					   const int dif)
{
	struct sock *sk;
62
	const struct hlist_nulls_node *node;
A
Al Viro 已提交
63
	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
64 65 66
	/* Optimize here for direct hit, only listening connections can
	 * have wildcards anyways.
	 */
67
	unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
68
	unsigned int slot = hash & hashinfo->ehash_mask;
69
	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
70

71 72 73

begin:
	sk_nulls_for_each_rcu(sk, node, &head->chain) {
74 75
		if (sk->sk_hash != hash)
			continue;
76 77
		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
			continue;
E
Eric Dumazet 已提交
78 79 80
		if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
			goto out;

81 82 83
		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
			sock_gen_put(sk);
			goto begin;
84
		}
85
		goto found;
86
	}
87 88 89
	if (get_nulls_value(node) != slot)
		goto begin;
out:
E
Eric Dumazet 已提交
90 91
	sk = NULL;
found:
92 93 94 95
	return sk;
}
EXPORT_SYMBOL(__inet6_lookup_established);

96
static inline int compute_score(struct sock *sk, struct net *net,
97 98 99 100 101 102
				const unsigned short hnum,
				const struct in6_addr *daddr,
				const int dif)
{
	int score = -1;

E
Eric Dumazet 已提交
103
	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
104 105 106
	    sk->sk_family == PF_INET6) {

		score = 1;
107 108
		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
109 110 111 112 113 114 115 116
				return -1;
			score++;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score++;
		}
117 118
		if (sk->sk_incoming_cpu == raw_smp_processor_id())
			score++;
119 120 121 122
	}
	return score;
}

123
struct sock *inet6_lookup_listener(struct net *net,
124 125 126
		struct inet_hashinfo *hashinfo,
		struct sk_buff *skb, int doff,
		const struct in6_addr *saddr,
127
		const __be16 sport, const struct in6_addr *daddr,
128
		const unsigned short hnum, const int dif)
129 130
{
	struct sock *sk;
131 132
	const struct hlist_nulls_node *node;
	struct sock *result;
133
	int score, hiscore, matches = 0, reuseport = 0;
134
	bool select_ok = true;
135
	u32 phash = 0;
136 137 138 139 140
	unsigned int hash = inet_lhashfn(net, hnum);
	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];

begin:
	result = NULL;
141
	hiscore = 0;
142 143 144 145 146
	sk_nulls_for_each(sk, node, &ilb->head) {
		score = compute_score(sk, net, hnum, daddr, dif);
		if (score > hiscore) {
			hiscore = score;
			result = sk;
147 148 149 150
			reuseport = sk->sk_reuseport;
			if (reuseport) {
				phash = inet6_ehashfn(net, daddr, hnum,
						      saddr, sport);
151 152 153 154 155 156 157 158 159
				if (select_ok) {
					struct sock *sk2;
					sk2 = reuseport_select_sock(sk, phash,
								    skb, doff);
					if (sk2) {
						result = sk2;
						goto found;
					}
				}
160 161 162 163
				matches = 1;
			}
		} else if (score == hiscore && reuseport) {
			matches++;
164
			if (reciprocal_scale(phash, matches) == 0)
165 166
				result = sk;
			phash = next_pseudo_random32(phash);
167 168
		}
	}
169 170 171 172 173 174 175 176
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
		goto begin;
	if (result) {
177
found:
178 179 180 181 182
		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
			result = NULL;
		else if (unlikely(compute_score(result, net, hnum, daddr,
				  dif) < hiscore)) {
			sock_put(result);
183
			select_ok = false;
184 185 186
			goto begin;
		}
	}
187 188 189 190
	return result;
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);

191
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
192
			  struct sk_buff *skb, int doff,
A
Al Viro 已提交
193 194
			  const struct in6_addr *saddr, const __be16 sport,
			  const struct in6_addr *daddr, const __be16 dport,
195 196 197 198
			  const int dif)
{
	struct sock *sk;

199 200
	sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
			    ntohs(dport), dif);
201 202 203 204

	return sk;
}
EXPORT_SYMBOL_GPL(inet6_lookup);
205 206 207 208 209 210

static int __inet6_check_established(struct inet_timewait_death_row *death_row,
				     struct sock *sk, const __u16 lport,
				     struct inet_timewait_sock **twp)
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
211
	struct inet_sock *inet = inet_sk(sk);
212 213
	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
	const struct in6_addr *saddr = &sk->sk_v6_daddr;
214
	const int dif = sk->sk_bound_dev_if;
E
Eric Dumazet 已提交
215
	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
216 217
	struct net *net = sock_net(sk);
	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
E
Eric Dumazet 已提交
218
						inet->inet_dport);
219
	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
220
	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
221
	struct sock *sk2;
222
	const struct hlist_nulls_node *node;
E
Eric Dumazet 已提交
223
	struct inet_timewait_sock *tw = NULL;
224

225
	spin_lock(lock);
226

E
Eric Dumazet 已提交
227
	sk_nulls_for_each(sk2, node, &head->chain) {
228 229
		if (sk2->sk_hash != hash)
			continue;
230

231 232
		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
			if (sk2->sk_state == TCP_TIME_WAIT) {
E
Eric Dumazet 已提交
233 234
				tw = inet_twsk(sk2);
				if (twsk_unique(sk, sk2, twp))
235
					break;
E
Eric Dumazet 已提交
236
			}
237
			goto not_unique;
238
		}
239 240
	}

241
	/* Must record num and sport now. Otherwise we will see
242 243
	 * in hash table socket with a funny identity.
	 */
E
Eric Dumazet 已提交
244 245
	inet->inet_num = lport;
	inet->inet_sport = htons(lport);
246
	sk->sk_hash = hash;
247
	WARN_ON(!sk_unhashed(sk));
248
	__sk_nulls_add_node_rcu(sk, &head->chain);
249
	if (tw) {
250
		sk_nulls_del_node_init_rcu((struct sock *)tw);
251 252
		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
	}
253
	spin_unlock(lock);
254
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
255

256
	if (twp) {
257
		*twp = tw;
258
	} else if (tw) {
259
		/* Silly. Should hash-dance instead... */
260
		inet_twsk_deschedule_put(tw);
261 262 263 264
	}
	return 0;

not_unique:
265
	spin_unlock(lock);
266 267 268
	return -EADDRNOTAVAIL;
}

269
static u32 inet6_sk_port_offset(const struct sock *sk)
270 271
{
	const struct inet_sock *inet = inet_sk(sk);
272 273 274

	return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
					  sk->sk_v6_daddr.s6_addr32,
E
Eric Dumazet 已提交
275
					  inet->inet_dport);
276 277 278 279 280
}

int inet6_hash_connect(struct inet_timewait_death_row *death_row,
		       struct sock *sk)
{
281 282 283 284 285
	u32 port_offset = 0;

	if (!inet_sk(sk)->inet_num)
		port_offset = inet6_sk_port_offset(sk);
	return __inet_hash_connect(death_row, sk, port_offset,
286
				   __inet6_check_established);
287 288
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
289 290 291 292 293

int inet6_hash(struct sock *sk)
{
	if (sk->sk_state != TCP_CLOSE) {
		local_bh_disable();
294
		__inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
		local_bh_enable();
	}

	return 0;
}
EXPORT_SYMBOL_GPL(inet6_hash);

/* match_wildcard == true:  IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
 *                          only, and any IPv4 addresses if not IPv6 only
 * match_wildcard == false: addresses must be exactly the same, i.e.
 *                          IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
 *                          and 0.0.0.0 equals to 0.0.0.0 only
 */
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
			 bool match_wildcard)
{
	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
	int sk2_ipv6only = inet_v6_ipv6only(sk2);
	int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;

	/* if both are mapped, treat as IPv4 */
	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
		if (!sk2_ipv6only) {
			if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
				return 1;
			if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
				return match_wildcard;
		}
		return 0;
	}

	if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
		return 1;

	if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
		return 1;

	if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
	    !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
		return 1;

	if (sk2_rcv_saddr6 &&
	    ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
		return 1;

	return 0;
}
EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);