inet_hashtables.c 16.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Generic INET transport hashtables
 *
 * Authors:	Lotsa people, from code originally in tcp
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

16
#include <linux/module.h>
17
#include <linux/random.h>
18
#include <linux/sched.h>
19
#include <linux/slab.h>
20
#include <linux/wait.h>
21
#include <linux/vmalloc.h>
22

23
#include <net/inet_connection_sock.h>
24
#include <net/inet_hashtables.h>
25
#include <net/secure_seq.h>
26
#include <net/ip.h>
27

28 29 30
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
			const __u16 lport, const __be32 faddr,
			const __be16 fport)
31
{
32 33 34 35
	static u32 inet_ehash_secret __read_mostly;

	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));

36 37 38 39
	return __inet_ehashfn(laddr, lport, faddr, fport,
			      inet_ehash_secret + net_hash_mix(net));
}

40 41 42
/* This function handles inet_sock, but also timewait and request sockets
 * for IPv4/IPv6.
 */
43
u32 sk_ehashfn(const struct sock *sk)
44
{
45 46 47 48 49 50 51
#if IS_ENABLED(CONFIG_IPV6)
	if (sk->sk_family == AF_INET6 &&
	    !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
		return inet6_ehashfn(sock_net(sk),
				     &sk->sk_v6_rcv_saddr, sk->sk_num,
				     &sk->sk_v6_daddr, sk->sk_dport);
#endif
52 53 54
	return inet_ehashfn(sock_net(sk),
			    sk->sk_rcv_saddr, sk->sk_num,
			    sk->sk_daddr, sk->sk_dport);
55 56
}

57 58 59 60
/*
 * Allocate and initialize a new local port bind bucket.
 * The bindhash mutex for snum's hash chain must be held here.
 */
61
struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
62
						 struct net *net,
63 64 65
						 struct inet_bind_hashbucket *head,
						 const unsigned short snum)
{
66
	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
67

68
	if (tb) {
69
		write_pnet(&tb->ib_net, net);
70 71
		tb->port      = snum;
		tb->fastreuse = 0;
72
		tb->fastreuseport = 0;
73
		tb->num_owners = 0;
74 75 76 77 78 79 80 81 82
		INIT_HLIST_HEAD(&tb->owners);
		hlist_add_head(&tb->node, &head->chain);
	}
	return tb;
}

/*
 * Caller must hold hashbucket lock for this tb with local BH disabled
 */
83
void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
84 85 86 87 88 89
{
	if (hlist_empty(&tb->owners)) {
		__hlist_del(&tb->node);
		kmem_cache_free(cachep, tb);
	}
}
90 91 92 93

void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
		    const unsigned short snum)
{
E
Eric Dumazet 已提交
94
	inet_sk(sk)->inet_num = snum;
95
	sk_add_bind_node(sk, &tb->owners);
96
	tb->num_owners++;
97
	inet_csk(sk)->icsk_bind_hash = tb;
98 99 100 101 102
}

/*
 * Get rid of any references to a local port held by the given sock.
 */
103
static void __inet_put_port(struct sock *sk)
104
{
105
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
E
Eric Dumazet 已提交
106
	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
107
			hashinfo->bhash_size);
108 109 110 111
	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
	struct inet_bind_bucket *tb;

	spin_lock(&head->lock);
112
	tb = inet_csk(sk)->icsk_bind_hash;
113
	__sk_del_bind_node(sk);
114
	tb->num_owners--;
115
	inet_csk(sk)->icsk_bind_hash = NULL;
E
Eric Dumazet 已提交
116
	inet_sk(sk)->inet_num = 0;
117 118 119 120
	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
	spin_unlock(&head->lock);
}

121
void inet_put_port(struct sock *sk)
122 123
{
	local_bh_disable();
124
	__inet_put_port(sk);
125 126 127
	local_bh_enable();
}
EXPORT_SYMBOL(inet_put_port);
128

129
int __inet_inherit_port(struct sock *sk, struct sock *child)
130 131
{
	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
132 133
	unsigned short port = inet_sk(child)->inet_num;
	const int bhash = inet_bhashfn(sock_net(sk), port,
134
			table->bhash_size);
135 136 137 138 139
	struct inet_bind_hashbucket *head = &table->bhash[bhash];
	struct inet_bind_bucket *tb;

	spin_lock(&head->lock);
	tb = inet_csk(sk)->icsk_bind_hash;
140 141 142 143 144 145
	if (tb->port != port) {
		/* NOTE: using tproxy and redirecting skbs to a proxy
		 * on a different listener port breaks the assumption
		 * that the listener socket's icsk_bind_hash is the same
		 * as that of the child socket. We have to look up or
		 * create a new bind bucket for the child here. */
146
		inet_bind_bucket_for_each(tb, &head->chain) {
147 148 149 150
			if (net_eq(ib_net(tb), sock_net(sk)) &&
			    tb->port == port)
				break;
		}
151
		if (!tb) {
152 153 154 155 156 157 158 159
			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
						     sock_net(sk), head, port);
			if (!tb) {
				spin_unlock(&head->lock);
				return -ENOMEM;
			}
		}
	}
160
	inet_bind_hash(child, tb, port);
161
	spin_unlock(&head->lock);
162 163

	return 0;
164 165 166
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);

167 168 169 170 171 172 173
static inline int compute_score(struct sock *sk, struct net *net,
				const unsigned short hnum, const __be32 daddr,
				const int dif)
{
	int score = -1;
	struct inet_sock *inet = inet_sk(sk);

E
Eric Dumazet 已提交
174
	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
175
			!ipv6_only_sock(sk)) {
E
Eric Dumazet 已提交
176
		__be32 rcv_saddr = inet->inet_rcv_saddr;
177
		score = sk->sk_family == PF_INET ? 2 : 1;
178 179 180
		if (rcv_saddr) {
			if (rcv_saddr != daddr)
				return -1;
181
			score += 4;
182 183 184 185
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
186
			score += 4;
187 188 189 190 191
		}
	}
	return score;
}

192 193 194 195 196 197
/*
 * Don't inline this cruft. Here are some nice properties to exploit here. The
 * BSD API does not allow a listening sock to specify the remote port nor the
 * remote address for the connection. So always assume those are both
 * wildcarded during the search since they can never be otherwise.
 */
198

199

200 201
struct sock *__inet_lookup_listener(struct net *net,
				    struct inet_hashinfo *hashinfo,
202
				    const __be32 saddr, __be16 sport,
203
				    const __be32 daddr, const unsigned short hnum,
204
				    const int dif)
205
{
206 207 208 209
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
	unsigned int hash = inet_lhashfn(net, hnum);
	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
210 211
	int score, hiscore, matches = 0, reuseport = 0;
	u32 phash = 0;
212

213 214 215
	rcu_read_lock();
begin:
	result = NULL;
216
	hiscore = 0;
217 218 219 220 221
	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
		score = compute_score(sk, net, hnum, daddr, dif);
		if (score > hiscore) {
			result = sk;
			hiscore = score;
222 223 224 225 226 227 228 229
			reuseport = sk->sk_reuseport;
			if (reuseport) {
				phash = inet_ehashfn(net, daddr, hnum,
						     saddr, sport);
				matches = 1;
			}
		} else if (score == hiscore && reuseport) {
			matches++;
230
			if (reciprocal_scale(phash, matches) == 0)
231 232
				result = sk;
			phash = next_pseudo_random32(phash);
233
		}
234
	}
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
		goto begin;
	if (result) {
		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
			result = NULL;
		else if (unlikely(compute_score(result, net, hnum, daddr,
				  dif) < hiscore)) {
			sock_put(result);
			goto begin;
		}
250
	}
251 252
	rcu_read_unlock();
	return result;
253
}
254
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
255

E
Eric Dumazet 已提交
256 257 258 259 260 261 262 263
/* All sockets share common refcount, but have different destructors */
void sock_gen_put(struct sock *sk)
{
	if (!atomic_dec_and_test(&sk->sk_refcnt))
		return;

	if (sk->sk_state == TCP_TIME_WAIT)
		inet_twsk_free(inet_twsk(sk));
264 265
	else if (sk->sk_state == TCP_NEW_SYN_RECV)
		reqsk_free(inet_reqsk(sk));
E
Eric Dumazet 已提交
266 267 268 269 270
	else
		sk_free(sk);
}
EXPORT_SYMBOL_GPL(sock_gen_put);

271 272 273 274 275 276
void sock_edemux(struct sk_buff *skb)
{
	sock_gen_put(skb->sk);
}
EXPORT_SYMBOL(sock_edemux);

D
Daniel Baluta 已提交
277
struct sock *__inet_lookup_established(struct net *net,
278
				  struct inet_hashinfo *hashinfo,
279 280 281 282
				  const __be32 saddr, const __be16 sport,
				  const __be32 daddr, const u16 hnum,
				  const int dif)
{
283
	INET_ADDR_COOKIE(acookie, saddr, daddr);
284 285
	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
	struct sock *sk;
286
	const struct hlist_nulls_node *node;
287 288 289
	/* Optimize here for direct hit, only listening connections can
	 * have wildcards anyways.
	 */
290
	unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
291
	unsigned int slot = hash & hashinfo->ehash_mask;
292
	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
293

294 295 296
	rcu_read_lock();
begin:
	sk_nulls_for_each_rcu(sk, node, &head->chain) {
297 298 299 300
		if (sk->sk_hash != hash)
			continue;
		if (likely(INET_MATCH(sk, net, acookie,
				      saddr, daddr, ports, dif))) {
301
			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
E
Eric Dumazet 已提交
302
				goto out;
303 304
			if (unlikely(!INET_MATCH(sk, net, acookie,
						 saddr, daddr, ports, dif))) {
E
Eric Dumazet 已提交
305
				sock_gen_put(sk);
306 307
				goto begin;
			}
E
Eric Dumazet 已提交
308
			goto found;
309
		}
310
	}
311 312 313 314 315 316 317
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != slot)
		goto begin;
318
out:
E
Eric Dumazet 已提交
319 320
	sk = NULL;
found:
321
	rcu_read_unlock();
322 323 324 325
	return sk;
}
EXPORT_SYMBOL_GPL(__inet_lookup_established);

326 327 328 329 330 331 332
/* called with local bh disabled */
static int __inet_check_established(struct inet_timewait_death_row *death_row,
				    struct sock *sk, __u16 lport,
				    struct inet_timewait_sock **twp)
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
	struct inet_sock *inet = inet_sk(sk);
E
Eric Dumazet 已提交
333 334
	__be32 daddr = inet->inet_rcv_saddr;
	__be32 saddr = inet->inet_daddr;
335
	int dif = sk->sk_bound_dev_if;
336
	INET_ADDR_COOKIE(acookie, saddr, daddr);
E
Eric Dumazet 已提交
337
	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
338
	struct net *net = sock_net(sk);
E
Eric Dumazet 已提交
339 340
	unsigned int hash = inet_ehashfn(net, daddr, lport,
					 saddr, inet->inet_dport);
341
	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
342
	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
343
	struct sock *sk2;
344
	const struct hlist_nulls_node *node;
E
Eric Dumazet 已提交
345
	struct inet_timewait_sock *tw = NULL;
346
	int twrefcnt = 0;
347

348
	spin_lock(lock);
349

350
	sk_nulls_for_each(sk2, node, &head->chain) {
351 352
		if (sk2->sk_hash != hash)
			continue;
E
Eric Dumazet 已提交
353

354
		if (likely(INET_MATCH(sk2, net, acookie,
E
Eric Dumazet 已提交
355 356 357 358 359 360
					 saddr, daddr, ports, dif))) {
			if (sk2->sk_state == TCP_TIME_WAIT) {
				tw = inet_twsk(sk2);
				if (twsk_unique(sk, sk2, twp))
					break;
			}
361
			goto not_unique;
E
Eric Dumazet 已提交
362
		}
363 364 365
	}

	/* Must record num and sport now. Otherwise we will see
E
Eric Dumazet 已提交
366 367
	 * in hash table socket with a funny identity.
	 */
E
Eric Dumazet 已提交
368 369
	inet->inet_num = lport;
	inet->inet_sport = htons(lport);
370
	sk->sk_hash = hash;
371
	WARN_ON(!sk_unhashed(sk));
372
	__sk_nulls_add_node_rcu(sk, &head->chain);
373 374 375 376
	if (tw) {
		twrefcnt = inet_twsk_unhash(tw);
		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
	}
377
	spin_unlock(lock);
378 379
	if (twrefcnt)
		inet_twsk_put(tw);
380
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
381 382 383 384 385

	if (twp) {
		*twp = tw;
	} else if (tw) {
		/* Silly. Should hash-dance instead... */
386
		inet_twsk_deschedule(tw);
387 388 389 390 391 392

		inet_twsk_put(tw);
	}
	return 0;

not_unique:
393
	spin_unlock(lock);
394 395 396
	return -EADDRNOTAVAIL;
}

397
static u32 inet_sk_port_offset(const struct sock *sk)
398 399
{
	const struct inet_sock *inet = inet_sk(sk);
400

E
Eric Dumazet 已提交
401 402 403
	return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
					  inet->inet_daddr,
					  inet->inet_dport);
404 405
}

406
int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
407
{
408
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
409
	struct hlist_nulls_head *list;
410
	struct inet_ehash_bucket *head;
411
	spinlock_t *lock;
412
	int twrefcnt = 0;
413

414
	WARN_ON(!sk_unhashed(sk));
415

416
	sk->sk_hash = sk_ehashfn(sk);
417 418 419 420
	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
	list = &head->chain;
	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);

421
	spin_lock(lock);
422
	__sk_nulls_add_node_rcu(sk, list);
423 424 425 426
	if (tw) {
		WARN_ON(sk->sk_hash != tw->tw_hash);
		twrefcnt = inet_twsk_unhash(tw);
	}
427
	spin_unlock(lock);
428
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
429
	return twrefcnt;
430 431 432
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);

E
Eric Dumazet 已提交
433
int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
434
{
435
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
436
	struct inet_listen_hashbucket *ilb;
437

E
Eric Dumazet 已提交
438 439
	if (sk->sk_state != TCP_LISTEN)
		return __inet_hash_nolisten(sk, tw);
440

441
	WARN_ON(!sk_unhashed(sk));
442
	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
443

444
	spin_lock(&ilb->lock);
445
	__sk_nulls_add_node_rcu(sk, &ilb->head);
446
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
447
	spin_unlock(&ilb->lock);
E
Eric Dumazet 已提交
448
	return 0;
449
}
E
Eric Dumazet 已提交
450
EXPORT_SYMBOL(__inet_hash);
451 452 453 454 455

void inet_hash(struct sock *sk)
{
	if (sk->sk_state != TCP_CLOSE) {
		local_bh_disable();
E
Eric Dumazet 已提交
456
		__inet_hash(sk, NULL);
457 458 459 460 461 462 463
		local_bh_enable();
	}
}
EXPORT_SYMBOL_GPL(inet_hash);

void inet_unhash(struct sock *sk)
{
464
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
465 466
	spinlock_t *lock;
	int done;
467 468

	if (sk_unhashed(sk))
469
		return;
470

471 472 473 474
	if (sk->sk_state == TCP_LISTEN)
		lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
	else
		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
475

476
	spin_lock_bh(lock);
C
Camelia Groza 已提交
477
	done = __sk_nulls_del_node_init_rcu(sk);
478 479
	if (done)
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
480
	spin_unlock_bh(lock);
481 482
}
EXPORT_SYMBOL_GPL(inet_unhash);
483

484
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
485
		struct sock *sk, u32 port_offset,
486
		int (*check_established)(struct inet_timewait_death_row *,
487
			struct sock *, __u16, struct inet_timewait_sock **))
488 489
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
E
Eric Dumazet 已提交
490
	const unsigned short snum = inet_sk(sk)->inet_num;
491 492
	struct inet_bind_hashbucket *head;
	struct inet_bind_bucket *tb;
493
	int ret;
494
	struct net *net = sock_net(sk);
495
	int twrefcnt = 1;
496

497
	if (!snum) {
498
		int i, remaining, low, high, port;
499
		static u32 hint;
500
		u32 offset = hint + port_offset;
501
		struct inet_timewait_sock *tw = NULL;
502

503
		inet_get_local_port_range(net, &low, &high);
504
		remaining = (high - low) + 1;
505

506 507 508 509 510 511
		/* By starting with offset being an even number,
		 * we tend to leave about 50% of ports for other uses,
		 * like bind(0).
		 */
		offset &= ~1;

512
		local_bh_disable();
513
		for (i = 0; i < remaining; i++) {
514
			port = low + (i + offset) % remaining;
515
			if (inet_is_local_reserved_port(net, port))
516
				continue;
517 518
			head = &hinfo->bhash[inet_bhashfn(net, port,
					hinfo->bhash_size)];
519
			spin_lock(&head->lock);
520

521 522 523 524
			/* Does not bother with rcv_saddr checks,
			 * because the established check is already
			 * unique enough.
			 */
525
			inet_bind_bucket_for_each(tb, &head->chain) {
O
Octavian Purdila 已提交
526 527
				if (net_eq(ib_net(tb), net) &&
				    tb->port == port) {
528 529
					if (tb->fastreuse >= 0 ||
					    tb->fastreuseport >= 0)
530
						goto next_port;
531
					WARN_ON(hlist_empty(&tb->owners));
532 533
					if (!check_established(death_row, sk,
								port, &tw))
534 535 536 537 538
						goto ok;
					goto next_port;
				}
			}

539 540
			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
					net, head, port);
541 542 543 544 545
			if (!tb) {
				spin_unlock(&head->lock);
				break;
			}
			tb->fastreuse = -1;
546
			tb->fastreuseport = -1;
547 548 549 550 551 552 553 554
			goto ok;

		next_port:
			spin_unlock(&head->lock);
		}
		local_bh_enable();

		return -EADDRNOTAVAIL;
555 556

ok:
557
		hint += (i + 2) & ~1;
558

559 560
		/* Head lock still held and bh's disabled */
		inet_bind_hash(sk, tb, port);
561
		if (sk_unhashed(sk)) {
E
Eric Dumazet 已提交
562
			inet_sk(sk)->inet_sport = htons(port);
563
			twrefcnt += __inet_hash_nolisten(sk, tw);
564
		}
565 566
		if (tw)
			twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
567
		spin_unlock(&head->lock);
568

569
		if (tw) {
570
			inet_twsk_deschedule(tw);
571 572 573 574
			while (twrefcnt) {
				twrefcnt--;
				inet_twsk_put(tw);
			}
575
		}
576 577 578

		ret = 0;
		goto out;
579
	}
580

581
	head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
582
	tb  = inet_csk(sk)->icsk_bind_hash;
583 584
	spin_lock_bh(&head->lock);
	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
585
		__inet_hash_nolisten(sk, NULL);
586 587 588 589 590
		spin_unlock_bh(&head->lock);
		return 0;
	} else {
		spin_unlock(&head->lock);
		/* No definite answer... Walk to established hash table */
591
		ret = check_established(death_row, sk, snum, NULL);
592 593 594 595 596
out:
		local_bh_enable();
		return ret;
	}
}
597 598 599 600 601 602 603

/*
 * Bind a port for a connect operation and hash it.
 */
int inet_hash_connect(struct inet_timewait_death_row *death_row,
		      struct sock *sk)
{
604 605 606 607 608
	u32 port_offset = 0;

	if (!inet_sk(sk)->inet_num)
		port_offset = inet_sk_port_offset(sk);
	return __inet_hash_connect(death_row, sk, port_offset,
609
				   __inet_check_established);
610
}
611
EXPORT_SYMBOL_GPL(inet_hash_connect);
612 613 614 615 616

void inet_hashinfo_init(struct inet_hashinfo *h)
{
	int i;

617
	for (i = 0; i < INET_LHTABLE_SIZE; i++) {
618
		spin_lock_init(&h->listening_hash[i].lock);
619 620 621
		INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
				      i + LISTENING_NULLS_BASE);
		}
622 623
}
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653

int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
	unsigned int i, nblocks = 1;

	if (sizeof(spinlock_t) != 0) {
		/* allocate 2 cache lines or at least one spinlock per cpu */
		nblocks = max_t(unsigned int,
				2 * L1_CACHE_BYTES / sizeof(spinlock_t),
				1);
		nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());

		/* no more locks than number of hash buckets */
		nblocks = min(nblocks, hashinfo->ehash_mask + 1);

		hashinfo->ehash_locks =	kmalloc_array(nblocks, sizeof(spinlock_t),
						      GFP_KERNEL | __GFP_NOWARN);
		if (!hashinfo->ehash_locks)
			hashinfo->ehash_locks = vmalloc(nblocks * sizeof(spinlock_t));

		if (!hashinfo->ehash_locks)
			return -ENOMEM;

		for (i = 0; i < nblocks; i++)
			spin_lock_init(&hashinfo->ehash_locks[i]);
	}
	hashinfo->ehash_locks_mask = nblocks - 1;
	return 0;
}
EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);