inet6_hashtables.c 9.1 KB
Newer Older
1 2 3 4 5 6 7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Generic INET6 transport hashtables
 *
8
 * Authors:	Lotsa people, from code originally in tcp, generalised here
9
 *		by Arnaldo Carvalho de Melo <acme@mandriva.com>
10 11 12 13 14 15 16 17
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/module.h>
18
#include <linux/random.h>
19 20 21 22

#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
23
#include <net/secure_seq.h>
24
#include <net/ip.h>
25

26 27 28
u32 inet6_ehashfn(const struct net *net,
		  const struct in6_addr *laddr, const u16 lport,
		  const struct in6_addr *faddr, const __be16 fport)
29
{
30 31 32 33 34 35 36 37 38 39 40
	static u32 inet6_ehash_secret __read_mostly;
	static u32 ipv6_hash_secret __read_mostly;

	u32 lhash, fhash;

	net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
	net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));

	lhash = (__force u32)laddr->s6_addr32[3];
	fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);

41
	return __inet6_ehashfn(lhash, lport, fhash, fport,
42
			       inet6_ehash_secret + net_hash_mix(net));
43 44
}

45 46 47 48 49 50
/*
 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 *
 * The sockhash lock must be held as a reader here.
 */
51 52
struct sock *__inet6_lookup_established(struct net *net,
					struct inet_hashinfo *hashinfo,
53
					   const struct in6_addr *saddr,
A
Al Viro 已提交
54
					   const __be16 sport,
55 56 57 58 59
					   const struct in6_addr *daddr,
					   const u16 hnum,
					   const int dif)
{
	struct sock *sk;
60
	const struct hlist_nulls_node *node;
A
Al Viro 已提交
61
	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
62 63 64
	/* Optimize here for direct hit, only listening connections can
	 * have wildcards anyways.
	 */
65
	unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
66
	unsigned int slot = hash & hashinfo->ehash_mask;
67
	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
68

69 70 71 72

	rcu_read_lock();
begin:
	sk_nulls_for_each_rcu(sk, node, &head->chain) {
73 74
		if (sk->sk_hash != hash)
			continue;
75 76
		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
			continue;
E
Eric Dumazet 已提交
77 78 79
		if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
			goto out;

80 81 82
		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
			sock_gen_put(sk);
			goto begin;
83
		}
84
		goto found;
85
	}
86 87 88
	if (get_nulls_value(node) != slot)
		goto begin;
out:
E
Eric Dumazet 已提交
89 90
	sk = NULL;
found:
91
	rcu_read_unlock();
92 93 94 95
	return sk;
}
EXPORT_SYMBOL(__inet6_lookup_established);

96
static inline int compute_score(struct sock *sk, struct net *net,
97 98 99 100 101 102
				const unsigned short hnum,
				const struct in6_addr *daddr,
				const int dif)
{
	int score = -1;

E
Eric Dumazet 已提交
103
	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
104 105 106
	    sk->sk_family == PF_INET6) {

		score = 1;
107 108
		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
109 110 111 112 113 114 115 116
				return -1;
			score++;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score++;
		}
117 118
		if (sk->sk_incoming_cpu == raw_smp_processor_id())
			score++;
119 120 121 122
	}
	return score;
}

123
struct sock *inet6_lookup_listener(struct net *net,
124 125
		struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
		const __be16 sport, const struct in6_addr *daddr,
126
		const unsigned short hnum, const int dif)
127 128
{
	struct sock *sk;
129 130
	const struct hlist_nulls_node *node;
	struct sock *result;
131 132
	int score, hiscore, matches = 0, reuseport = 0;
	u32 phash = 0;
133 134 135 136 137 138
	unsigned int hash = inet_lhashfn(net, hnum);
	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];

	rcu_read_lock();
begin:
	result = NULL;
139
	hiscore = 0;
140 141 142 143 144
	sk_nulls_for_each(sk, node, &ilb->head) {
		score = compute_score(sk, net, hnum, daddr, dif);
		if (score > hiscore) {
			hiscore = score;
			result = sk;
145 146 147 148 149 150 151 152
			reuseport = sk->sk_reuseport;
			if (reuseport) {
				phash = inet6_ehashfn(net, daddr, hnum,
						      saddr, sport);
				matches = 1;
			}
		} else if (score == hiscore && reuseport) {
			matches++;
153
			if (reciprocal_scale(phash, matches) == 0)
154 155
				result = sk;
			phash = next_pseudo_random32(phash);
156 157
		}
	}
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
		goto begin;
	if (result) {
		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
			result = NULL;
		else if (unlikely(compute_score(result, net, hnum, daddr,
				  dif) < hiscore)) {
			sock_put(result);
			goto begin;
		}
	}
	rcu_read_unlock();
175 176 177 178
	return result;
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);

179
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
A
Al Viro 已提交
180 181
			  const struct in6_addr *saddr, const __be16 sport,
			  const struct in6_addr *daddr, const __be16 dport,
182 183 184 185 186
			  const int dif)
{
	struct sock *sk;

	local_bh_disable();
187
	sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif);
188 189 190 191 192
	local_bh_enable();

	return sk;
}
EXPORT_SYMBOL_GPL(inet6_lookup);
193 194 195 196 197 198

static int __inet6_check_established(struct inet_timewait_death_row *death_row,
				     struct sock *sk, const __u16 lport,
				     struct inet_timewait_sock **twp)
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
199
	struct inet_sock *inet = inet_sk(sk);
200 201
	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
	const struct in6_addr *saddr = &sk->sk_v6_daddr;
202
	const int dif = sk->sk_bound_dev_if;
E
Eric Dumazet 已提交
203
	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
204 205
	struct net *net = sock_net(sk);
	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
E
Eric Dumazet 已提交
206
						inet->inet_dport);
207
	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
208
	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
209
	struct sock *sk2;
210
	const struct hlist_nulls_node *node;
E
Eric Dumazet 已提交
211
	struct inet_timewait_sock *tw = NULL;
212

213
	spin_lock(lock);
214

E
Eric Dumazet 已提交
215
	sk_nulls_for_each(sk2, node, &head->chain) {
216 217
		if (sk2->sk_hash != hash)
			continue;
218

219 220
		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
			if (sk2->sk_state == TCP_TIME_WAIT) {
E
Eric Dumazet 已提交
221 222
				tw = inet_twsk(sk2);
				if (twsk_unique(sk, sk2, twp))
223
					break;
E
Eric Dumazet 已提交
224
			}
225
			goto not_unique;
226
		}
227 228
	}

229
	/* Must record num and sport now. Otherwise we will see
230 231
	 * in hash table socket with a funny identity.
	 */
E
Eric Dumazet 已提交
232 233
	inet->inet_num = lport;
	inet->inet_sport = htons(lport);
234
	sk->sk_hash = hash;
235
	WARN_ON(!sk_unhashed(sk));
236
	__sk_nulls_add_node_rcu(sk, &head->chain);
237
	if (tw) {
238
		sk_nulls_del_node_init_rcu((struct sock *)tw);
239 240
		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
	}
241
	spin_unlock(lock);
242
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
243

244
	if (twp) {
245
		*twp = tw;
246
	} else if (tw) {
247
		/* Silly. Should hash-dance instead... */
248
		inet_twsk_deschedule_put(tw);
249 250 251 252
	}
	return 0;

not_unique:
253
	spin_unlock(lock);
254 255 256
	return -EADDRNOTAVAIL;
}

257
static u32 inet6_sk_port_offset(const struct sock *sk)
258 259
{
	const struct inet_sock *inet = inet_sk(sk);
260 261 262

	return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
					  sk->sk_v6_daddr.s6_addr32,
E
Eric Dumazet 已提交
263
					  inet->inet_dport);
264 265 266 267 268
}

int inet6_hash_connect(struct inet_timewait_death_row *death_row,
		       struct sock *sk)
{
269 270 271 272 273
	u32 port_offset = 0;

	if (!inet_sk(sk)->inet_num)
		port_offset = inet6_sk_port_offset(sk);
	return __inet_hash_connect(death_row, sk, port_offset,
274
				   __inet6_check_established);
275 276
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332

int inet6_hash(struct sock *sk)
{
	if (sk->sk_state != TCP_CLOSE) {
		local_bh_disable();
		__inet_hash(sk, NULL);
		local_bh_enable();
	}

	return 0;
}
EXPORT_SYMBOL_GPL(inet6_hash);

/* match_wildcard == true:  IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
 *                          only, and any IPv4 addresses if not IPv6 only
 * match_wildcard == false: addresses must be exactly the same, i.e.
 *                          IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
 *                          and 0.0.0.0 equals to 0.0.0.0 only
 */
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
			 bool match_wildcard)
{
	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
	int sk2_ipv6only = inet_v6_ipv6only(sk2);
	int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;

	/* if both are mapped, treat as IPv4 */
	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
		if (!sk2_ipv6only) {
			if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
				return 1;
			if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
				return match_wildcard;
		}
		return 0;
	}

	if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
		return 1;

	if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
		return 1;

	if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
	    !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
		return 1;

	if (sk2_rcv_saddr6 &&
	    ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
		return 1;

	return 0;
}
EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);