inet_hashtables.c 16.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Generic INET transport hashtables
 *
 * Authors:	Lotsa people, from code originally in tcp
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

16
#include <linux/module.h>
17
#include <linux/random.h>
18
#include <linux/sched.h>
19
#include <linux/slab.h>
20
#include <linux/wait.h>
21
#include <linux/vmalloc.h>
22

23
#include <net/inet_connection_sock.h>
24
#include <net/inet_hashtables.h>
25
#include <net/secure_seq.h>
26
#include <net/ip.h>
27

28 29 30
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
			const __u16 lport, const __be32 faddr,
			const __be16 fport)
31
{
32 33 34 35
	static u32 inet_ehash_secret __read_mostly;

	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));

36 37 38 39
	return __inet_ehashfn(laddr, lport, faddr, fport,
			      inet_ehash_secret + net_hash_mix(net));
}

40 41 42
/* This function handles inet_sock, but also timewait and request sockets
 * for IPv4/IPv6.
 */
43
u32 sk_ehashfn(const struct sock *sk)
44
{
45 46 47 48 49 50 51
#if IS_ENABLED(CONFIG_IPV6)
	if (sk->sk_family == AF_INET6 &&
	    !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
		return inet6_ehashfn(sock_net(sk),
				     &sk->sk_v6_rcv_saddr, sk->sk_num,
				     &sk->sk_v6_daddr, sk->sk_dport);
#endif
52 53 54
	return inet_ehashfn(sock_net(sk),
			    sk->sk_rcv_saddr, sk->sk_num,
			    sk->sk_daddr, sk->sk_dport);
55 56
}

57 58 59 60
/*
 * Allocate and initialize a new local port bind bucket.
 * The bindhash mutex for snum's hash chain must be held here.
 */
61
struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
62
						 struct net *net,
63 64 65
						 struct inet_bind_hashbucket *head,
						 const unsigned short snum)
{
66
	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
67

68
	if (tb) {
69
		write_pnet(&tb->ib_net, net);
70 71
		tb->port      = snum;
		tb->fastreuse = 0;
72
		tb->fastreuseport = 0;
73
		tb->num_owners = 0;
74 75 76 77 78 79 80 81 82
		INIT_HLIST_HEAD(&tb->owners);
		hlist_add_head(&tb->node, &head->chain);
	}
	return tb;
}

/*
 * Caller must hold hashbucket lock for this tb with local BH disabled
 */
83
void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
84 85 86 87 88 89
{
	if (hlist_empty(&tb->owners)) {
		__hlist_del(&tb->node);
		kmem_cache_free(cachep, tb);
	}
}
90 91 92 93

void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
		    const unsigned short snum)
{
E
Eric Dumazet 已提交
94
	inet_sk(sk)->inet_num = snum;
95
	sk_add_bind_node(sk, &tb->owners);
96
	tb->num_owners++;
97
	inet_csk(sk)->icsk_bind_hash = tb;
98 99 100 101 102
}

/*
 * Get rid of any references to a local port held by the given sock.
 */
103
static void __inet_put_port(struct sock *sk)
104
{
105
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
E
Eric Dumazet 已提交
106
	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
107
			hashinfo->bhash_size);
108 109 110 111
	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
	struct inet_bind_bucket *tb;

	spin_lock(&head->lock);
112
	tb = inet_csk(sk)->icsk_bind_hash;
113
	__sk_del_bind_node(sk);
114
	tb->num_owners--;
115
	inet_csk(sk)->icsk_bind_hash = NULL;
E
Eric Dumazet 已提交
116
	inet_sk(sk)->inet_num = 0;
117 118 119 120
	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
	spin_unlock(&head->lock);
}

121
void inet_put_port(struct sock *sk)
122 123
{
	local_bh_disable();
124
	__inet_put_port(sk);
125 126 127
	local_bh_enable();
}
EXPORT_SYMBOL(inet_put_port);
128

129
int __inet_inherit_port(struct sock *sk, struct sock *child)
130 131
{
	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
132 133
	unsigned short port = inet_sk(child)->inet_num;
	const int bhash = inet_bhashfn(sock_net(sk), port,
134
			table->bhash_size);
135 136 137 138 139
	struct inet_bind_hashbucket *head = &table->bhash[bhash];
	struct inet_bind_bucket *tb;

	spin_lock(&head->lock);
	tb = inet_csk(sk)->icsk_bind_hash;
140 141 142 143 144 145
	if (tb->port != port) {
		/* NOTE: using tproxy and redirecting skbs to a proxy
		 * on a different listener port breaks the assumption
		 * that the listener socket's icsk_bind_hash is the same
		 * as that of the child socket. We have to look up or
		 * create a new bind bucket for the child here. */
146
		inet_bind_bucket_for_each(tb, &head->chain) {
147 148 149 150
			if (net_eq(ib_net(tb), sock_net(sk)) &&
			    tb->port == port)
				break;
		}
151
		if (!tb) {
152 153 154 155 156 157 158 159
			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
						     sock_net(sk), head, port);
			if (!tb) {
				spin_unlock(&head->lock);
				return -ENOMEM;
			}
		}
	}
160
	inet_bind_hash(child, tb, port);
161
	spin_unlock(&head->lock);
162 163

	return 0;
164 165 166
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);

167 168 169 170 171 172 173
static inline int compute_score(struct sock *sk, struct net *net,
				const unsigned short hnum, const __be32 daddr,
				const int dif)
{
	int score = -1;
	struct inet_sock *inet = inet_sk(sk);

E
Eric Dumazet 已提交
174
	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
175
			!ipv6_only_sock(sk)) {
E
Eric Dumazet 已提交
176
		__be32 rcv_saddr = inet->inet_rcv_saddr;
177
		score = sk->sk_family == PF_INET ? 2 : 1;
178 179 180
		if (rcv_saddr) {
			if (rcv_saddr != daddr)
				return -1;
181
			score += 4;
182 183 184 185
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
186
			score += 4;
187 188 189 190 191
		}
	}
	return score;
}

192 193 194 195 196 197
/*
 * Don't inline this cruft. Here are some nice properties to exploit here. The
 * BSD API does not allow a listening sock to specify the remote port nor the
 * remote address for the connection. So always assume those are both
 * wildcarded during the search since they can never be otherwise.
 */
198

199

200 201
struct sock *__inet_lookup_listener(struct net *net,
				    struct inet_hashinfo *hashinfo,
202
				    const __be32 saddr, __be16 sport,
203
				    const __be32 daddr, const unsigned short hnum,
204
				    const int dif)
205
{
206 207 208 209
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
	unsigned int hash = inet_lhashfn(net, hnum);
	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
210 211
	int score, hiscore, matches = 0, reuseport = 0;
	u32 phash = 0;
212

213 214 215
	rcu_read_lock();
begin:
	result = NULL;
216
	hiscore = 0;
217 218 219 220 221
	sk_nulls_for_each_rcu(sk, node, &ilb->head) {
		score = compute_score(sk, net, hnum, daddr, dif);
		if (score > hiscore) {
			result = sk;
			hiscore = score;
222 223 224 225 226 227 228 229
			reuseport = sk->sk_reuseport;
			if (reuseport) {
				phash = inet_ehashfn(net, daddr, hnum,
						     saddr, sport);
				matches = 1;
			}
		} else if (score == hiscore && reuseport) {
			matches++;
230
			if (reciprocal_scale(phash, matches) == 0)
231 232
				result = sk;
			phash = next_pseudo_random32(phash);
233
		}
234
	}
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
		goto begin;
	if (result) {
		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
			result = NULL;
		else if (unlikely(compute_score(result, net, hnum, daddr,
				  dif) < hiscore)) {
			sock_put(result);
			goto begin;
		}
250
	}
251 252
	rcu_read_unlock();
	return result;
253
}
254
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
255

E
Eric Dumazet 已提交
256 257 258 259 260 261 262 263
/* All sockets share common refcount, but have different destructors */
void sock_gen_put(struct sock *sk)
{
	if (!atomic_dec_and_test(&sk->sk_refcnt))
		return;

	if (sk->sk_state == TCP_TIME_WAIT)
		inet_twsk_free(inet_twsk(sk));
264 265
	else if (sk->sk_state == TCP_NEW_SYN_RECV)
		reqsk_free(inet_reqsk(sk));
E
Eric Dumazet 已提交
266 267 268 269 270
	else
		sk_free(sk);
}
EXPORT_SYMBOL_GPL(sock_gen_put);

271 272 273 274 275 276
void sock_edemux(struct sk_buff *skb)
{
	sock_gen_put(skb->sk);
}
EXPORT_SYMBOL(sock_edemux);

D
Daniel Baluta 已提交
277
struct sock *__inet_lookup_established(struct net *net,
278
				  struct inet_hashinfo *hashinfo,
279 280 281 282
				  const __be32 saddr, const __be16 sport,
				  const __be32 daddr, const u16 hnum,
				  const int dif)
{
283
	INET_ADDR_COOKIE(acookie, saddr, daddr);
284 285
	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
	struct sock *sk;
286
	const struct hlist_nulls_node *node;
287 288 289
	/* Optimize here for direct hit, only listening connections can
	 * have wildcards anyways.
	 */
290
	unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
291
	unsigned int slot = hash & hashinfo->ehash_mask;
292
	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
293

294 295 296
	rcu_read_lock();
begin:
	sk_nulls_for_each_rcu(sk, node, &head->chain) {
297 298 299 300
		if (sk->sk_hash != hash)
			continue;
		if (likely(INET_MATCH(sk, net, acookie,
				      saddr, daddr, ports, dif))) {
301
			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
E
Eric Dumazet 已提交
302
				goto out;
303 304
			if (unlikely(!INET_MATCH(sk, net, acookie,
						 saddr, daddr, ports, dif))) {
E
Eric Dumazet 已提交
305
				sock_gen_put(sk);
306 307
				goto begin;
			}
E
Eric Dumazet 已提交
308
			goto found;
309
		}
310
	}
311 312 313 314 315 316 317
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != slot)
		goto begin;
318
out:
E
Eric Dumazet 已提交
319 320
	sk = NULL;
found:
321
	rcu_read_unlock();
322 323 324 325
	return sk;
}
EXPORT_SYMBOL_GPL(__inet_lookup_established);

326 327 328 329 330 331 332
/* called with local bh disabled */
static int __inet_check_established(struct inet_timewait_death_row *death_row,
				    struct sock *sk, __u16 lport,
				    struct inet_timewait_sock **twp)
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
	struct inet_sock *inet = inet_sk(sk);
E
Eric Dumazet 已提交
333 334
	__be32 daddr = inet->inet_rcv_saddr;
	__be32 saddr = inet->inet_daddr;
335
	int dif = sk->sk_bound_dev_if;
336
	INET_ADDR_COOKIE(acookie, saddr, daddr);
E
Eric Dumazet 已提交
337
	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
338
	struct net *net = sock_net(sk);
E
Eric Dumazet 已提交
339 340
	unsigned int hash = inet_ehashfn(net, daddr, lport,
					 saddr, inet->inet_dport);
341
	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
342
	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
343
	struct sock *sk2;
344
	const struct hlist_nulls_node *node;
E
Eric Dumazet 已提交
345
	struct inet_timewait_sock *tw = NULL;
346

347
	spin_lock(lock);
348

349
	sk_nulls_for_each(sk2, node, &head->chain) {
350 351
		if (sk2->sk_hash != hash)
			continue;
E
Eric Dumazet 已提交
352

353
		if (likely(INET_MATCH(sk2, net, acookie,
E
Eric Dumazet 已提交
354 355 356 357 358 359
					 saddr, daddr, ports, dif))) {
			if (sk2->sk_state == TCP_TIME_WAIT) {
				tw = inet_twsk(sk2);
				if (twsk_unique(sk, sk2, twp))
					break;
			}
360
			goto not_unique;
E
Eric Dumazet 已提交
361
		}
362 363 364
	}

	/* Must record num and sport now. Otherwise we will see
E
Eric Dumazet 已提交
365 366
	 * in hash table socket with a funny identity.
	 */
E
Eric Dumazet 已提交
367 368
	inet->inet_num = lport;
	inet->inet_sport = htons(lport);
369
	sk->sk_hash = hash;
370
	WARN_ON(!sk_unhashed(sk));
371
	__sk_nulls_add_node_rcu(sk, &head->chain);
372
	if (tw) {
373
		sk_nulls_del_node_init_rcu((struct sock *)tw);
374 375
		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
	}
376
	spin_unlock(lock);
377
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
378 379 380 381 382

	if (twp) {
		*twp = tw;
	} else if (tw) {
		/* Silly. Should hash-dance instead... */
383
		inet_twsk_deschedule(tw);
384 385 386 387 388
		inet_twsk_put(tw);
	}
	return 0;

not_unique:
389
	spin_unlock(lock);
390 391 392
	return -EADDRNOTAVAIL;
}

393
static u32 inet_sk_port_offset(const struct sock *sk)
394 395
{
	const struct inet_sock *inet = inet_sk(sk);
396

E
Eric Dumazet 已提交
397 398 399
	return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
					  inet->inet_daddr,
					  inet->inet_dport);
400 401
}

402
void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
403
{
404
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
405
	struct hlist_nulls_head *list;
406
	struct inet_ehash_bucket *head;
407
	spinlock_t *lock;
408

409
	WARN_ON(!sk_unhashed(sk));
410

411
	sk->sk_hash = sk_ehashfn(sk);
412 413 414 415
	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
	list = &head->chain;
	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);

416
	spin_lock(lock);
417
	__sk_nulls_add_node_rcu(sk, list);
418 419 420
	if (osk) {
		WARN_ON(sk->sk_hash != osk->sk_hash);
		sk_nulls_del_node_init_rcu(osk);
421
	}
422
	spin_unlock(lock);
423
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
424 425 426
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);

427
void __inet_hash(struct sock *sk, struct sock *osk)
428
{
429
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
430
	struct inet_listen_hashbucket *ilb;
431

E
Eric Dumazet 已提交
432
	if (sk->sk_state != TCP_LISTEN)
433
		return __inet_hash_nolisten(sk, osk);
434

435
	WARN_ON(!sk_unhashed(sk));
436
	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
437

438
	spin_lock(&ilb->lock);
439
	__sk_nulls_add_node_rcu(sk, &ilb->head);
440
	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
441
	spin_unlock(&ilb->lock);
442
}
E
Eric Dumazet 已提交
443
EXPORT_SYMBOL(__inet_hash);
444 445 446 447 448

void inet_hash(struct sock *sk)
{
	if (sk->sk_state != TCP_CLOSE) {
		local_bh_disable();
E
Eric Dumazet 已提交
449
		__inet_hash(sk, NULL);
450 451 452 453 454 455 456
		local_bh_enable();
	}
}
EXPORT_SYMBOL_GPL(inet_hash);

void inet_unhash(struct sock *sk)
{
457
	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
458 459
	spinlock_t *lock;
	int done;
460 461

	if (sk_unhashed(sk))
462
		return;
463

464 465 466 467
	if (sk->sk_state == TCP_LISTEN)
		lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock;
	else
		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
468

469
	spin_lock_bh(lock);
C
Camelia Groza 已提交
470
	done = __sk_nulls_del_node_init_rcu(sk);
471 472
	if (done)
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
473
	spin_unlock_bh(lock);
474 475
}
EXPORT_SYMBOL_GPL(inet_unhash);
476

477
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
478
		struct sock *sk, u32 port_offset,
479
		int (*check_established)(struct inet_timewait_death_row *,
480
			struct sock *, __u16, struct inet_timewait_sock **))
481 482
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
E
Eric Dumazet 已提交
483
	const unsigned short snum = inet_sk(sk)->inet_num;
484 485
	struct inet_bind_hashbucket *head;
	struct inet_bind_bucket *tb;
486
	int ret;
487
	struct net *net = sock_net(sk);
488

489
	if (!snum) {
490
		int i, remaining, low, high, port;
491
		static u32 hint;
492
		u32 offset = hint + port_offset;
493
		struct inet_timewait_sock *tw = NULL;
494

495
		inet_get_local_port_range(net, &low, &high);
496
		remaining = (high - low) + 1;
497

498 499 500 501 502 503
		/* By starting with offset being an even number,
		 * we tend to leave about 50% of ports for other uses,
		 * like bind(0).
		 */
		offset &= ~1;

504
		local_bh_disable();
505
		for (i = 0; i < remaining; i++) {
506
			port = low + (i + offset) % remaining;
507
			if (inet_is_local_reserved_port(net, port))
508
				continue;
509 510
			head = &hinfo->bhash[inet_bhashfn(net, port,
					hinfo->bhash_size)];
511
			spin_lock(&head->lock);
512

513 514 515 516
			/* Does not bother with rcv_saddr checks,
			 * because the established check is already
			 * unique enough.
			 */
517
			inet_bind_bucket_for_each(tb, &head->chain) {
O
Octavian Purdila 已提交
518 519
				if (net_eq(ib_net(tb), net) &&
				    tb->port == port) {
520 521
					if (tb->fastreuse >= 0 ||
					    tb->fastreuseport >= 0)
522
						goto next_port;
523
					WARN_ON(hlist_empty(&tb->owners));
524 525
					if (!check_established(death_row, sk,
								port, &tw))
526 527 528 529 530
						goto ok;
					goto next_port;
				}
			}

531 532
			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
					net, head, port);
533 534 535 536 537
			if (!tb) {
				spin_unlock(&head->lock);
				break;
			}
			tb->fastreuse = -1;
538
			tb->fastreuseport = -1;
539 540 541 542 543 544 545 546
			goto ok;

		next_port:
			spin_unlock(&head->lock);
		}
		local_bh_enable();

		return -EADDRNOTAVAIL;
547 548

ok:
549
		hint += (i + 2) & ~1;
550

551 552
		/* Head lock still held and bh's disabled */
		inet_bind_hash(sk, tb, port);
553
		if (sk_unhashed(sk)) {
E
Eric Dumazet 已提交
554
			inet_sk(sk)->inet_sport = htons(port);
555
			__inet_hash_nolisten(sk, (struct sock *)tw);
556
		}
557
		if (tw)
558
			inet_twsk_bind_unhash(tw, hinfo);
559
		spin_unlock(&head->lock);
560

561
		if (tw) {
562
			inet_twsk_deschedule(tw);
563
			inet_twsk_put(tw);
564
		}
565 566 567

		ret = 0;
		goto out;
568
	}
569

570
	head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
571
	tb  = inet_csk(sk)->icsk_bind_hash;
572 573
	spin_lock_bh(&head->lock);
	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
574
		__inet_hash_nolisten(sk, NULL);
575 576 577 578 579
		spin_unlock_bh(&head->lock);
		return 0;
	} else {
		spin_unlock(&head->lock);
		/* No definite answer... Walk to established hash table */
580
		ret = check_established(death_row, sk, snum, NULL);
581 582 583 584 585
out:
		local_bh_enable();
		return ret;
	}
}
586 587 588 589 590 591 592

/*
 * Bind a port for a connect operation and hash it.
 */
int inet_hash_connect(struct inet_timewait_death_row *death_row,
		      struct sock *sk)
{
593 594 595 596 597
	u32 port_offset = 0;

	if (!inet_sk(sk)->inet_num)
		port_offset = inet_sk_port_offset(sk);
	return __inet_hash_connect(death_row, sk, port_offset,
598
				   __inet_check_established);
599
}
600
EXPORT_SYMBOL_GPL(inet_hash_connect);
601 602 603 604 605

void inet_hashinfo_init(struct inet_hashinfo *h)
{
	int i;

606
	for (i = 0; i < INET_LHTABLE_SIZE; i++) {
607
		spin_lock_init(&h->listening_hash[i].lock);
608 609 610
		INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
				      i + LISTENING_NULLS_BASE);
		}
611 612
}
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642

int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
	unsigned int i, nblocks = 1;

	if (sizeof(spinlock_t) != 0) {
		/* allocate 2 cache lines or at least one spinlock per cpu */
		nblocks = max_t(unsigned int,
				2 * L1_CACHE_BYTES / sizeof(spinlock_t),
				1);
		nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());

		/* no more locks than number of hash buckets */
		nblocks = min(nblocks, hashinfo->ehash_mask + 1);

		hashinfo->ehash_locks =	kmalloc_array(nblocks, sizeof(spinlock_t),
						      GFP_KERNEL | __GFP_NOWARN);
		if (!hashinfo->ehash_locks)
			hashinfo->ehash_locks = vmalloc(nblocks * sizeof(spinlock_t));

		if (!hashinfo->ehash_locks)
			return -ENOMEM;

		for (i = 0; i < nblocks; i++)
			spin_lock_init(&hashinfo->ehash_locks[i]);
	}
	hashinfo->ehash_locks_mask = nblocks - 1;
	return 0;
}
EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);