inet_hashtables.c 16.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Generic INET transport hashtables
 *
 * Authors:	Lotsa people, from code originally in tcp
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

16
#include <linux/module.h>
17
#include <linux/random.h>
18
#include <linux/sched.h>
19
#include <linux/slab.h>
20
#include <linux/wait.h>
21
#include <linux/vmalloc.h>
22

23
#include <net/inet_connection_sock.h>
24
#include <net/inet_hashtables.h>
25
#include <net/secure_seq.h>
26
#include <net/ip.h>
27

28 29 30
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
			const __u16 lport, const __be32 faddr,
			const __be16 fport)
31
{
32 33 34 35
	static u32 inet_ehash_secret __read_mostly;

	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));

36 37 38 39
	return __inet_ehashfn(laddr, lport, faddr, fport,
			      inet_ehash_secret + net_hash_mix(net));
}

40 41 42
/* This function handles inet_sock, but also timewait and request sockets
 * for IPv4/IPv6.
 */
43
u32 sk_ehashfn(const struct sock *sk)
44
{
45 46 47 48 49 50 51
#if IS_ENABLED(CONFIG_IPV6)
	if (sk->sk_family == AF_INET6 &&
	    !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
		return inet6_ehashfn(sock_net(sk),
				     &sk->sk_v6_rcv_saddr, sk->sk_num,
				     &sk->sk_v6_daddr, sk->sk_dport);
#endif
52 53 54
	return inet_ehashfn(sock_net(sk),
			    sk->sk_rcv_saddr, sk->sk_num,
			    sk->sk_daddr, sk->sk_dport);
55 56
}

57 58 59 60
/*
 * Allocate and initialize a new local port bind bucket.
 * The bindhash mutex for snum's hash chain must be held here.
 */
61
struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
62
						 struct net *net,
63 64 65
						 struct inet_bind_hashbucket *head,
						 const unsigned short snum)
{
66
	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
67

68
	if (tb) {
69
		write_pnet(&tb->ib_net, net);
70 71
		tb->port      = snum;
		tb->fastreuse = 0;
72
		tb->fastreuseport = 0;
73
		tb->num_owners = 0;
74 75 76 77 78 79 80 81 82
		INIT_HLIST_HEAD(&tb->owners);
		hlist_add_head(&tb->node, &head->chain);
	}
	return tb;
}

/*
 * Caller must hold hashbucket lock for this tb with local BH disabled
 */
83
void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
84 85 86 87 88 89
{
	if (hlist_empty(&tb->owners)) {
		__hlist_del(&tb->node);
		kmem_cache_free(cachep, tb);
	}
}
90 91 92 93

void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
		    const unsigned short snum)
{
E
Eric Dumazet 已提交
94
	inet_sk(sk)->inet_num = snum;
95
	sk_add_bind_node(sk, &tb->owners);
96
	tb->num_owners++;
97
	inet_csk(sk)->icsk_bind_hash = tb;
98 99 100 101 102
}

/*
 * Get rid of any references to a local port held by the given sock.
 */
103
static void __inet_put_port(struct sock *sk)
104
{
105
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
E
Eric Dumazet 已提交
106
	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
107
			hashinfo->bhash_size);
108 109 110 111
	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
	struct inet_bind_bucket *tb;

	spin_lock(&head->lock);
112
	tb = inet_csk(sk)->icsk_bind_hash;
113
	__sk_del_bind_node(sk);
114
	tb->num_owners--;
115
	inet_csk(sk)->icsk_bind_hash = NULL;
E
Eric Dumazet 已提交
116
	inet_sk(sk)->inet_num = 0;
117 118 119 120
	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
	spin_unlock(&head->lock);
}

121
void inet_put_port(struct sock *sk)
122 123
{
	local_bh_disable();
124
	__inet_put_port(sk);
125 126 127
	local_bh_enable();
}
EXPORT_SYMBOL(inet_put_port);
128

129
int __inet_inherit_port(struct sock *sk, struct sock *child)
130 131
{
	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
132 133
	unsigned short port = inet_sk(child)->inet_num;
	const int bhash = inet_bhashfn(sock_net(sk), port,
134
			table->bhash_size);
135 136 137 138 139
	struct inet_bind_hashbucket *head = &table->bhash[bhash];
	struct inet_bind_bucket *tb;

	spin_lock(&head->lock);
	tb = inet_csk(sk)->icsk_bind_hash;
140 141 142 143 144 145
	if (tb->port != port) {
		/* NOTE: using tproxy and redirecting skbs to a proxy
		 * on a different listener port breaks the assumption
		 * that the listener socket's icsk_bind_hash is the same
		 * as that of the child socket. We have to look up or
		 * create a new bind bucket for the child here. */
146
		inet_bind_bucket_for_each(tb, &head->chain) {
147 148 149 150
			if (net_eq(ib_net(tb), sock_net(sk)) &&
			    tb->port == port)
				break;
		}
151
		if (!tb) {
152 153 154 155 156 157 158 159
			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
						     sock_net(sk), head, port);
			if (!tb) {
				spin_unlock(&head->lock);
				return -ENOMEM;
			}
		}
	}
160
	inet_bind_hash(child, tb, port);
161
	spin_unlock(&head->lock);
162 163

	return 0;
164 165 166
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);

167 168 169 170 171 172 173
static inline int compute_score(struct sock *sk, struct net *net,
				const unsigned short hnum, const __be32 daddr,
				const int dif)
{
	int score = -1;
	struct inet_sock *inet = inet_sk(sk);

E
Eric Dumazet 已提交
174
	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
175
			!ipv6_only_sock(sk)) {
E
Eric Dumazet 已提交
176
		__be32 rcv_saddr = inet->inet_rcv_saddr;
177
		score = sk->sk_family == PF_INET ? 2 : 1;
178 179 180
		if (rcv_saddr) {
			if (rcv_saddr != daddr)
				return -1;
181
			score += 4;
182 183 184 185
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
186
			score += 4;
187 188 189 190 191
		}
	}
	return score;
}

192 193 194 195 196 197
/*
 * Don't inline this cruft. Here are some nice properties to exploit here. The
 * BSD API does not allow a listening sock to specify the remote port nor the
 * remote address for the connection. So always assume those are both
 * wildcarded during the search since they can never be otherwise.
 */
198

199

200 201
struct sock *__inet_lookup_listener(struct net *net,
				    struct inet_hashinfo *hashinfo,
202
				    const __be32 saddr, __be16 sport,
203
				    const __be32 daddr, const unsigned short hnum,
204
				    const int dif)
205
{
206 207 208 209
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
	unsigned int hash = inet_lhashfn(net, hnum);
	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
210 211
	int score, hiscore, matches = 0, reuseport = 0;
	u32 phash = 0;
212

213 214 215
	rcu_read_lock();
begin:
	result = NULL;
216
	hiscore = 0;
217 218 219 220 221
	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
		score = compute_score(sk, net, hnum, daddr, dif);
		if (score > hiscore) {
			result = sk;
			hiscore = score;
222 223 224 225 226 227 228 229
			reuseport = sk->sk_reuseport;
			if (reuseport) {
				phash = inet_ehashfn(net, daddr, hnum,
						     saddr, sport);
				matches = 1;
			}
		} else if (score == hiscore && reuseport) {
			matches++;
230
			if (reciprocal_scale(phash, matches) == 0)
231 232
				result = sk;
			phash = next_pseudo_random32(phash);
233
		}
234
	}
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
		goto begin;
	if (result) {
		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
			result = NULL;
		else if (unlikely(compute_score(result, net, hnum, daddr,
				  dif) < hiscore)) {
			sock_put(result);
			goto begin;
		}
250
	}
251 252
	rcu_read_unlock();
	return result;
253
}
254
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
255

E
Eric Dumazet 已提交
256 257 258 259 260 261 262 263
/* All sockets share common refcount, but have different destructors */
void sock_gen_put(struct sock *sk)
{
	if (!atomic_dec_and_test(&sk->sk_refcnt))
		return;

	if (sk->sk_state == TCP_TIME_WAIT)
		inet_twsk_free(inet_twsk(sk));
264 265
	else if (sk->sk_state == TCP_NEW_SYN_RECV)
		reqsk_free(inet_reqsk(sk));
E
Eric Dumazet 已提交
266 267 268 269 270
	else
		sk_free(sk);
}
EXPORT_SYMBOL_GPL(sock_gen_put);

271 272 273 274 275 276
void sock_edemux(struct sk_buff *skb)
{
	sock_gen_put(skb->sk);
}
EXPORT_SYMBOL(sock_edemux);

D
Daniel Baluta 已提交
277
struct sock *__inet_lookup_established(struct net *net,
278
				  struct inet_hashinfo *hashinfo,
279 280 281 282
				  const __be32 saddr, const __be16 sport,
				  const __be32 daddr, const u16 hnum,
				  const int dif)
{
283
	INET_ADDR_COOKIE(acookie, saddr, daddr);
284 285
	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
	struct sock *sk;
286
	const struct hlist_nulls_node *node;
287 288 289
	/* Optimize here for direct hit, only listening connections can
	 * have wildcards anyways.
	 */
290
	unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
291
	unsigned int slot = hash & hashinfo->ehash_mask;
292
	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
293

294 295 296
	rcu_read_lock();
begin:
	sk_nulls_for_each_rcu(sk, node, &head->chain) {
297 298 299 300
		if (sk->sk_hash != hash)
			continue;
		if (likely(INET_MATCH(sk, net, acookie,
				      saddr, daddr, ports, dif))) {
301
			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
E
Eric Dumazet 已提交
302
				goto out;
303 304
			if (unlikely(!INET_MATCH(sk, net, acookie,
						 saddr, daddr, ports, dif))) {
E
Eric Dumazet 已提交
305
				sock_gen_put(sk);
306 307
				goto begin;
			}
E
Eric Dumazet 已提交
308
			goto found;
309
		}
310
	}
311 312 313 314 315 316 317
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != slot)
		goto begin;
318
out:
E
Eric Dumazet 已提交
319 320
	sk = NULL;
found:
321
	rcu_read_unlock();
322 323 324 325
	return sk;
}
EXPORT_SYMBOL_GPL(__inet_lookup_established);

326 327 328 329 330 331 332
/* called with local bh disabled */
static int __inet_check_established(struct inet_timewait_death_row *death_row,
				    struct sock *sk, __u16 lport,
				    struct inet_timewait_sock **twp)
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
	struct inet_sock *inet = inet_sk(sk);
E
Eric Dumazet 已提交
333 334
	__be32 daddr = inet->inet_rcv_saddr;
	__be32 saddr = inet->inet_daddr;
335
	int dif = sk->sk_bound_dev_if;
336
	INET_ADDR_COOKIE(acookie, saddr, daddr);
E
Eric Dumazet 已提交
337
	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
338
	struct net *net = sock_net(sk);
E
Eric Dumazet 已提交
339 340
	unsigned int hash = inet_ehashfn(net, daddr, lport,
					 saddr, inet->inet_dport);
341
	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
342
	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
343
	struct sock *sk2;
344
	const struct hlist_nulls_node *node;
E
Eric Dumazet 已提交
345
	struct inet_timewait_sock *tw = NULL;
346
	int twrefcnt = 0;
347

348
	spin_lock(lock);
349

350
	sk_nulls_for_each(sk2, node, &head->chain) {
351 352
		if (sk2->sk_hash != hash)
			continue;
E
Eric Dumazet 已提交
353

354
		if (likely(INET_MATCH(sk2, net, acookie,
E
Eric Dumazet 已提交
355 356 357 358 359 360
					 saddr, daddr, ports, dif))) {
			if (sk2->sk_state == TCP_TIME_WAIT) {
				tw = inet_twsk(sk2);
				if (twsk_unique(sk, sk2, twp))
					break;
			}
361
			goto not_unique;
E
Eric Dumazet 已提交
362
		}
363 364 365
	}

	/* Must record num and sport now. Otherwise we will see
E
Eric Dumazet 已提交
366 367
	 * in hash table socket with a funny identity.
	 */
E
Eric Dumazet 已提交
368 369
	inet->inet_num = lport;
	inet->inet_sport = htons(lport);
370
	sk->sk_hash = hash;
371
	WARN_ON(!sk_unhashed(sk));
372
	__sk_nulls_add_node_rcu(sk, &head->chain);
373 374 375 376
	if (tw) {
		twrefcnt = inet_twsk_unhash(tw);
		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
	}
377
	spin_unlock(lock);
378 379
	if (twrefcnt)
		inet_twsk_put(tw);
380
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
381 382 383 384 385

	if (twp) {
		*twp = tw;
	} else if (tw) {
		/* Silly. Should hash-dance instead... */
386
		inet_twsk_deschedule(tw);
387 388 389 390 391 392

		inet_twsk_put(tw);
	}
	return 0;

not_unique:
393
	spin_unlock(lock);
394 395 396 397 398 399
	return -EADDRNOTAVAIL;
}

static inline u32 inet_sk_port_offset(const struct sock *sk)
{
	const struct inet_sock *inet = inet_sk(sk);
E
Eric Dumazet 已提交
400 401 402
	return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
					  inet->inet_daddr,
					  inet->inet_dport);
403 404
}

405
int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
406
{
407
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
408
	struct hlist_nulls_head *list;
409
	struct inet_ehash_bucket *head;
410
	spinlock_t *lock;
411
	int twrefcnt = 0;
412

413
	WARN_ON(!sk_unhashed(sk));
414

415
	sk->sk_hash = sk_ehashfn(sk);
416 417 418 419
	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
	list = &head->chain;
	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);

420
	spin_lock(lock);
421
	__sk_nulls_add_node_rcu(sk, list);
422 423 424 425
	if (tw) {
		WARN_ON(sk->sk_hash != tw->tw_hash);
		twrefcnt = inet_twsk_unhash(tw);
	}
426
	spin_unlock(lock);
427
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
428
	return twrefcnt;
429 430 431
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);

E
Eric Dumazet 已提交
432
int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
433
{
434
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
435
	struct inet_listen_hashbucket *ilb;
436

E
Eric Dumazet 已提交
437 438
	if (sk->sk_state != TCP_LISTEN)
		return __inet_hash_nolisten(sk, tw);
439

440
	WARN_ON(!sk_unhashed(sk));
441
	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
442

443
	spin_lock(&ilb->lock);
444
	__sk_nulls_add_node_rcu(sk, &ilb->head);
445
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
446
	spin_unlock(&ilb->lock);
E
Eric Dumazet 已提交
447
	return 0;
448
}
E
Eric Dumazet 已提交
449
EXPORT_SYMBOL(__inet_hash);
450 451 452 453 454

void inet_hash(struct sock *sk)
{
	if (sk->sk_state != TCP_CLOSE) {
		local_bh_disable();
E
Eric Dumazet 已提交
455
		__inet_hash(sk, NULL);
456 457 458 459 460 461 462
		local_bh_enable();
	}
}
EXPORT_SYMBOL_GPL(inet_hash);

void inet_unhash(struct sock *sk)
{
463
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
464 465
	spinlock_t *lock;
	int done;
466 467

	if (sk_unhashed(sk))
468
		return;
469

470 471 472 473
	if (sk->sk_state == TCP_LISTEN)
		lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
	else
		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
474

475
	spin_lock_bh(lock);
C
Camelia Groza 已提交
476
	done = __sk_nulls_del_node_init_rcu(sk);
477 478
	if (done)
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
479
	spin_unlock_bh(lock);
480 481
}
EXPORT_SYMBOL_GPL(inet_unhash);
482

483
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
484
		struct sock *sk, u32 port_offset,
485
		int (*check_established)(struct inet_timewait_death_row *,
486
			struct sock *, __u16, struct inet_timewait_sock **))
487 488
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
E
Eric Dumazet 已提交
489
	const unsigned short snum = inet_sk(sk)->inet_num;
490 491
	struct inet_bind_hashbucket *head;
	struct inet_bind_bucket *tb;
492
	int ret;
493
	struct net *net = sock_net(sk);
494
	int twrefcnt = 1;
495

496
	if (!snum) {
497
		int i, remaining, low, high, port;
498
		static u32 hint;
499
		u32 offset = hint + port_offset;
500
		struct inet_timewait_sock *tw = NULL;
501

502
		inet_get_local_port_range(net, &low, &high);
503
		remaining = (high - low) + 1;
504

505 506 507 508 509 510
		/* By starting with offset being an even number,
		 * we tend to leave about 50% of ports for other uses,
		 * like bind(0).
		 */
		offset &= ~1;

511
		local_bh_disable();
512
		for (i = 0; i < remaining; i++) {
513
			port = low + (i + offset) % remaining;
514
			if (inet_is_local_reserved_port(net, port))
515
				continue;
516 517
			head = &hinfo->bhash[inet_bhashfn(net, port,
					hinfo->bhash_size)];
518
			spin_lock(&head->lock);
519

520 521 522 523
			/* Does not bother with rcv_saddr checks,
			 * because the established check is already
			 * unique enough.
			 */
524
			inet_bind_bucket_for_each(tb, &head->chain) {
O
Octavian Purdila 已提交
525 526
				if (net_eq(ib_net(tb), net) &&
				    tb->port == port) {
527 528
					if (tb->fastreuse >= 0 ||
					    tb->fastreuseport >= 0)
529
						goto next_port;
530
					WARN_ON(hlist_empty(&tb->owners));
531 532
					if (!check_established(death_row, sk,
								port, &tw))
533 534 535 536 537
						goto ok;
					goto next_port;
				}
			}

538 539
			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
					net, head, port);
540 541 542 543 544
			if (!tb) {
				spin_unlock(&head->lock);
				break;
			}
			tb->fastreuse = -1;
545
			tb->fastreuseport = -1;
546 547 548 549 550 551 552 553
			goto ok;

		next_port:
			spin_unlock(&head->lock);
		}
		local_bh_enable();

		return -EADDRNOTAVAIL;
554 555

ok:
556
		hint += (i + 2) & ~1;
557

558 559
		/* Head lock still held and bh's disabled */
		inet_bind_hash(sk, tb, port);
560
		if (sk_unhashed(sk)) {
E
Eric Dumazet 已提交
561
			inet_sk(sk)->inet_sport = htons(port);
562
			twrefcnt += __inet_hash_nolisten(sk, tw);
563
		}
564 565
		if (tw)
			twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
566
		spin_unlock(&head->lock);
567

568
		if (tw) {
569
			inet_twsk_deschedule(tw);
570 571 572 573
			while (twrefcnt) {
				twrefcnt--;
				inet_twsk_put(tw);
			}
574
		}
575 576 577

		ret = 0;
		goto out;
578
	}
579

580
	head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
581
	tb  = inet_csk(sk)->icsk_bind_hash;
582 583
	spin_lock_bh(&head->lock);
	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
584
		__inet_hash_nolisten(sk, NULL);
585 586 587 588 589
		spin_unlock_bh(&head->lock);
		return 0;
	} else {
		spin_unlock(&head->lock);
		/* No definite answer... Walk to established hash table */
590
		ret = check_established(death_row, sk, snum, NULL);
591 592 593 594 595
out:
		local_bh_enable();
		return ret;
	}
}
596 597 598 599 600 601 602

/*
 * Bind a port for a connect operation and hash it.
 */
int inet_hash_connect(struct inet_timewait_death_row *death_row,
		      struct sock *sk)
{
603
	return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
604
				   __inet_check_established);
605
}
606
EXPORT_SYMBOL_GPL(inet_hash_connect);
607 608 609 610 611

void inet_hashinfo_init(struct inet_hashinfo *h)
{
	int i;

612
	for (i = 0; i < INET_LHTABLE_SIZE; i++) {
613
		spin_lock_init(&h->listening_hash[i].lock);
614 615 616
		INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
				      i + LISTENING_NULLS_BASE);
		}
617 618
}
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648

int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
	unsigned int i, nblocks = 1;

	if (sizeof(spinlock_t) != 0) {
		/* allocate 2 cache lines or at least one spinlock per cpu */
		nblocks = max_t(unsigned int,
				2 * L1_CACHE_BYTES / sizeof(spinlock_t),
				1);
		nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());

		/* no more locks than number of hash buckets */
		nblocks = min(nblocks, hashinfo->ehash_mask + 1);

		hashinfo->ehash_locks =	kmalloc_array(nblocks, sizeof(spinlock_t),
						      GFP_KERNEL | __GFP_NOWARN);
		if (!hashinfo->ehash_locks)
			hashinfo->ehash_locks = vmalloc(nblocks * sizeof(spinlock_t));

		if (!hashinfo->ehash_locks)
			return -ENOMEM;

		for (i = 0; i < nblocks; i++)
			spin_lock_init(&hashinfo->ehash_locks[i]);
	}
	hashinfo->ehash_locks_mask = nblocks - 1;
	return 0;
}
EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);