neighbour.c 73.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 *	Generic address resolution entity
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Fixes:
 *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
 *	Harald Welte		Add neighbour cache statistics like rtstat
 */

J
Joe Perches 已提交
18 19
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

20
#include <linux/slab.h>
L
Linus Torvalds 已提交
21 22 23 24 25 26 27 28 29 30
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/times.h>
31
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
32 33 34
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
35
#include <net/netevent.h>
36
#include <net/netlink.h>
L
Linus Torvalds 已提交
37 38
#include <linux/rtnetlink.h>
#include <linux/random.h>
39
#include <linux/string.h>
40
#include <linux/log2.h>
L
Linus Torvalds 已提交
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60

#define NEIGH_DEBUG 1

#define NEIGH_PRINTK(x...) printk(x)
#define NEIGH_NOPRINTK(x...) do { ; } while(0)
#define NEIGH_PRINTK1 NEIGH_NOPRINTK
#define NEIGH_PRINTK2 NEIGH_NOPRINTK

#if NEIGH_DEBUG >= 1
#undef NEIGH_PRINTK1
#define NEIGH_PRINTK1 NEIGH_PRINTK
#endif
#if NEIGH_DEBUG >= 2
#undef NEIGH_PRINTK2
#define NEIGH_PRINTK2 NEIGH_PRINTK
#endif

#define PNEIGH_HASHMASK		0xF

static void neigh_timer_handler(unsigned long arg);
T
Thomas Graf 已提交
61 62
static void __neigh_notify(struct neighbour *n, int type, int flags);
static void neigh_update_notify(struct neighbour *neigh);
L
Linus Torvalds 已提交
63 64 65
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);

static struct neigh_table *neigh_tables;
66
#ifdef CONFIG_PROC_FS
67
static const struct file_operations neigh_stat_seq_fops;
68
#endif
L
Linus Torvalds 已提交
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102

/*
   Neighbour hash table buckets are protected with rwlock tbl->lock.

   - All the scans/updates to hash buckets MUST be made under this lock.
   - NOTHING clever should be made under this lock: no callbacks
     to protocol backends, no attempts to send something to network.
     It will result in deadlocks, if backend/driver wants to use neighbour
     cache.
   - If the entry requires some non-trivial actions, increase
     its reference count and release table lock.

   Neighbour entries are protected:
   - with reference count.
   - with rwlock neigh->lock

   Reference count prevents destruction.

   neigh->lock mainly serializes ll address data and its validity state.
   However, the same lock is used to protect another entry fields:
    - timer
    - resolution queue

   Again, nothing clever shall be made under neigh->lock,
   the most complicated procedure, which we allow is dev->hard_header.
   It is supposed, that dev->hard_header is simplistic and does
   not make callbacks to neighbour tables.

   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
   list of neighbour tables. This list is used only in process context,
 */

static DEFINE_RWLOCK(neigh_tbl_lock);

103
static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
104 105 106 107 108
{
	kfree_skb(skb);
	return -ENETDOWN;
}

109 110 111 112 113
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
	if (neigh->parms->neigh_cleanup)
		neigh->parms->neigh_cleanup(neigh);

T
Thomas Graf 已提交
114
	__neigh_notify(neigh, RTM_DELNEIGH, 0);
115 116 117
	neigh_release(neigh);
}

L
Linus Torvalds 已提交
118 119 120 121 122 123 124 125
/*
 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 * It corresponds to default IPv6 settings and is not overridable,
 * because it is really reasonable choice.
 */

unsigned long neigh_rand_reach_time(unsigned long base)
{
E
Eric Dumazet 已提交
126
	return base ? (net_random() % base) + (base >> 1) : 0;
L
Linus Torvalds 已提交
127
}
128
EXPORT_SYMBOL(neigh_rand_reach_time);
L
Linus Torvalds 已提交
129 130 131 132 133 134


static int neigh_forced_gc(struct neigh_table *tbl)
{
	int shrunk = 0;
	int i;
135
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
136 137 138 139

	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);

	write_lock_bh(&tbl->lock);
140 141
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
142
	for (i = 0; i < (1 << nht->hash_shift); i++) {
143 144
		struct neighbour *n;
		struct neighbour __rcu **np;
L
Linus Torvalds 已提交
145

146
		np = &nht->hash_buckets[i];
147 148
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
149 150 151 152 153 154 155
			/* Neighbour record may be discarded if:
			 * - nobody refers to it.
			 * - it is not permanent
			 */
			write_lock(&n->lock);
			if (atomic_read(&n->refcnt) == 1 &&
			    !(n->nud_state & NUD_PERMANENT)) {
156 157 158
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						  lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
159 160 161
				n->dead = 1;
				shrunk	= 1;
				write_unlock(&n->lock);
162
				neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
163 164 165 166 167 168 169 170 171 172 173 174 175 176
				continue;
			}
			write_unlock(&n->lock);
			np = &n->next;
		}
	}

	tbl->last_flush = jiffies;

	write_unlock_bh(&tbl->lock);

	return shrunk;
}

177 178 179 180 181 182 183 184 185 186
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
	neigh_hold(n);
	if (unlikely(mod_timer(&n->timer, when))) {
		printk("NEIGH: BUG, double timer add, state is %x\n",
		       n->nud_state);
		dump_stack();
	}
}

L
Linus Torvalds 已提交
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
static int neigh_del_timer(struct neighbour *n)
{
	if ((n->nud_state & NUD_IN_TIMER) &&
	    del_timer(&n->timer)) {
		neigh_release(n);
		return 1;
	}
	return 0;
}

static void pneigh_queue_purge(struct sk_buff_head *list)
{
	struct sk_buff *skb;

	while ((skb = skb_dequeue(list)) != NULL) {
		dev_put(skb->dev);
		kfree_skb(skb);
	}
}

207
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
L
Linus Torvalds 已提交
208 209
{
	int i;
210
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
211

212 213 214
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));

215
	for (i = 0; i < (1 << nht->hash_shift); i++) {
216 217
		struct neighbour *n;
		struct neighbour __rcu **np = &nht->hash_buckets[i];
L
Linus Torvalds 已提交
218

219 220
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
221 222 223 224
			if (dev && n->dev != dev) {
				np = &n->next;
				continue;
			}
225 226 227
			rcu_assign_pointer(*np,
				   rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
			write_lock(&n->lock);
			neigh_del_timer(n);
			n->dead = 1;

			if (atomic_read(&n->refcnt) != 1) {
				/* The most unpleasant situation.
				   We must destroy neighbour entry,
				   but someone still uses it.

				   The destroy will be delayed until
				   the last user releases us, but
				   we must kill timers etc. and move
				   it to safe state.
				 */
				skb_queue_purge(&n->arp_queue);
E
Eric Dumazet 已提交
243
				n->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
244 245 246 247 248 249 250 251
				n->output = neigh_blackhole;
				if (n->nud_state & NUD_VALID)
					n->nud_state = NUD_NOARP;
				else
					n->nud_state = NUD_NONE;
				NEIGH_PRINTK2("neigh %p is stray.\n", n);
			}
			write_unlock(&n->lock);
252
			neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
253 254
		}
	}
255
}
L
Linus Torvalds 已提交
256

257 258 259 260 261 262
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
	write_unlock_bh(&tbl->lock);
}
263
EXPORT_SYMBOL(neigh_changeaddr);
264 265 266 267 268

int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
L
Linus Torvalds 已提交
269 270 271 272 273 274 275
	pneigh_ifdown(tbl, dev);
	write_unlock_bh(&tbl->lock);

	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	return 0;
}
276
EXPORT_SYMBOL(neigh_ifdown);
L
Linus Torvalds 已提交
277

278
static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
L
Linus Torvalds 已提交
279 280 281 282 283 284 285 286 287 288 289 290 291 292
{
	struct neighbour *n = NULL;
	unsigned long now = jiffies;
	int entries;

	entries = atomic_inc_return(&tbl->entries) - 1;
	if (entries >= tbl->gc_thresh3 ||
	    (entries >= tbl->gc_thresh2 &&
	     time_after(now, tbl->last_flush + 5 * HZ))) {
		if (!neigh_forced_gc(tbl) &&
		    entries >= tbl->gc_thresh3)
			goto out_entries;
	}

293 294 295 296 297 298 299 300 301
	if (tbl->entry_size)
		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
	else {
		int sz = sizeof(*n) + tbl->key_len;

		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
		sz += dev->neigh_priv_len;
		n = kzalloc(sz, GFP_ATOMIC);
	}
L
Linus Torvalds 已提交
302 303 304 305 306
	if (!n)
		goto out_entries;

	skb_queue_head_init(&n->arp_queue);
	rwlock_init(&n->lock);
307
	seqlock_init(&n->ha_lock);
L
Linus Torvalds 已提交
308 309 310
	n->updated	  = n->used = now;
	n->nud_state	  = NUD_NONE;
	n->output	  = neigh_blackhole;
311
	seqlock_init(&n->hh.hh_lock);
L
Linus Torvalds 已提交
312
	n->parms	  = neigh_parms_clone(&tbl->parms);
313
	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
L
Linus Torvalds 已提交
314 315 316 317 318 319 320 321 322 323 324 325 326

	NEIGH_CACHE_STAT_INC(tbl, allocs);
	n->tbl		  = tbl;
	atomic_set(&n->refcnt, 1);
	n->dead		  = 1;
out:
	return n;

out_entries:
	atomic_dec(&tbl->entries);
	goto out;
}

327 328 329 330 331 332
static void neigh_get_hash_rnd(u32 *x)
{
	get_random_bytes(x, sizeof(*x));
	*x |= 1;
}

333
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
L
Linus Torvalds 已提交
334
{
335
	size_t size = (1 << shift) * sizeof(struct neighbour *);
336
	struct neigh_hash_table *ret;
E
Eric Dumazet 已提交
337
	struct neighbour __rcu **buckets;
338
	int i;
L
Linus Torvalds 已提交
339

340 341 342 343 344 345
	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
	if (!ret)
		return NULL;
	if (size <= PAGE_SIZE)
		buckets = kzalloc(size, GFP_ATOMIC);
	else
E
Eric Dumazet 已提交
346
		buckets = (struct neighbour __rcu **)
347 348 349 350 351
			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
					   get_order(size));
	if (!buckets) {
		kfree(ret);
		return NULL;
L
Linus Torvalds 已提交
352
	}
E
Eric Dumazet 已提交
353
	ret->hash_buckets = buckets;
354
	ret->hash_shift = shift;
355 356
	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
		neigh_get_hash_rnd(&ret->hash_rnd[i]);
L
Linus Torvalds 已提交
357 358 359
	return ret;
}

360
static void neigh_hash_free_rcu(struct rcu_head *head)
L
Linus Torvalds 已提交
361
{
362 363 364
	struct neigh_hash_table *nht = container_of(head,
						    struct neigh_hash_table,
						    rcu);
365
	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
E
Eric Dumazet 已提交
366
	struct neighbour __rcu **buckets = nht->hash_buckets;
L
Linus Torvalds 已提交
367 368

	if (size <= PAGE_SIZE)
369
		kfree(buckets);
L
Linus Torvalds 已提交
370
	else
371 372
		free_pages((unsigned long)buckets, get_order(size));
	kfree(nht);
L
Linus Torvalds 已提交
373 374
}

375
static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
376
						unsigned long new_shift)
L
Linus Torvalds 已提交
377
{
378 379
	unsigned int i, hash;
	struct neigh_hash_table *new_nht, *old_nht;
L
Linus Torvalds 已提交
380 381 382

	NEIGH_CACHE_STAT_INC(tbl, hash_grows);

383 384
	old_nht = rcu_dereference_protected(tbl->nht,
					    lockdep_is_held(&tbl->lock));
385
	new_nht = neigh_hash_alloc(new_shift);
386 387
	if (!new_nht)
		return old_nht;
L
Linus Torvalds 已提交
388

389
	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
L
Linus Torvalds 已提交
390 391
		struct neighbour *n, *next;

392 393
		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
						   lockdep_is_held(&tbl->lock));
394 395 396 397
		     n != NULL;
		     n = next) {
			hash = tbl->hash(n->primary_key, n->dev,
					 new_nht->hash_rnd);
L
Linus Torvalds 已提交
398

399
			hash >>= (32 - new_nht->hash_shift);
400 401 402 403 404 405 406 407
			next = rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock));

			rcu_assign_pointer(n->next,
					   rcu_dereference_protected(
						new_nht->hash_buckets[hash],
						lockdep_is_held(&tbl->lock)));
			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
L
Linus Torvalds 已提交
408 409 410
		}
	}

411 412 413
	rcu_assign_pointer(tbl->nht, new_nht);
	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
	return new_nht;
L
Linus Torvalds 已提交
414 415 416 417 418 419 420
}

struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
			       struct net_device *dev)
{
	struct neighbour *n;
	int key_len = tbl->key_len;
421
	u32 hash_val;
422
	struct neigh_hash_table *nht;
423

L
Linus Torvalds 已提交
424 425
	NEIGH_CACHE_STAT_INC(tbl, lookups);

426 427
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
428
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
429 430 431 432

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
L
Linus Torvalds 已提交
433
		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
434 435
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
L
Linus Torvalds 已提交
436 437 438 439
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
440

441
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
442 443
	return n;
}
444
EXPORT_SYMBOL(neigh_lookup);
L
Linus Torvalds 已提交
445

446 447
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
				     const void *pkey)
L
Linus Torvalds 已提交
448 449 450
{
	struct neighbour *n;
	int key_len = tbl->key_len;
451
	u32 hash_val;
452
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
453 454 455

	NEIGH_CACHE_STAT_INC(tbl, lookups);

456 457
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
458
	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
459 460 461 462

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
463
		if (!memcmp(n->primary_key, pkey, key_len) &&
464
		    net_eq(dev_net(n->dev), net)) {
465 466
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
L
Linus Torvalds 已提交
467 468 469 470
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
471

472
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
473 474
	return n;
}
475
EXPORT_SYMBOL(neigh_lookup_nodev);
L
Linus Torvalds 已提交
476

477 478
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
				 struct net_device *dev, bool want_ref)
L
Linus Torvalds 已提交
479 480 481 482
{
	u32 hash_val;
	int key_len = tbl->key_len;
	int error;
483
	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
484
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500

	if (!n) {
		rc = ERR_PTR(-ENOBUFS);
		goto out;
	}

	memcpy(n->primary_key, pkey, key_len);
	n->dev = dev;
	dev_hold(dev);

	/* Protocol specific setup. */
	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

501 502 503 504 505 506 507 508
	if (dev->netdev_ops->ndo_neigh_construct) {
		error = dev->netdev_ops->ndo_neigh_construct(n);
		if (error < 0) {
			rc = ERR_PTR(error);
			goto out_neigh_release;
		}
	}

509 510 511 512 513 514 515
	/* Device specific setup. */
	if (n->parms->neigh_setup &&
	    (error = n->parms->neigh_setup(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

L
Linus Torvalds 已提交
516 517 518
	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

	write_lock_bh(&tbl->lock);
519 520
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
521

522 523
	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
L
Linus Torvalds 已提交
524

525
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
L
Linus Torvalds 已提交
526 527 528 529 530 531

	if (n->parms->dead) {
		rc = ERR_PTR(-EINVAL);
		goto out_tbl_unlock;
	}

532 533 534 535 536
	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
					    lockdep_is_held(&tbl->lock));
	     n1 != NULL;
	     n1 = rcu_dereference_protected(n1->next,
			lockdep_is_held(&tbl->lock))) {
L
Linus Torvalds 已提交
537
		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
538 539
			if (want_ref)
				neigh_hold(n1);
L
Linus Torvalds 已提交
540 541 542 543 544 545
			rc = n1;
			goto out_tbl_unlock;
		}
	}

	n->dead = 0;
546 547
	if (want_ref)
		neigh_hold(n);
548 549 550 551
	rcu_assign_pointer(n->next,
			   rcu_dereference_protected(nht->hash_buckets[hash_val],
						     lockdep_is_held(&tbl->lock)));
	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
L
Linus Torvalds 已提交
552 553 554 555 556 557 558 559 560 561 562
	write_unlock_bh(&tbl->lock);
	NEIGH_PRINTK2("neigh %p is created.\n", n);
	rc = n;
out:
	return rc;
out_tbl_unlock:
	write_unlock_bh(&tbl->lock);
out_neigh_release:
	neigh_release(n);
	goto out;
}
563
EXPORT_SYMBOL(__neigh_create);
L
Linus Torvalds 已提交
564

565
static u32 pneigh_hash(const void *pkey, int key_len)
566 567 568 569 570 571
{
	u32 hash_val = *(u32 *)(pkey + key_len - 4);
	hash_val ^= (hash_val >> 16);
	hash_val ^= hash_val >> 8;
	hash_val ^= hash_val >> 4;
	hash_val &= PNEIGH_HASHMASK;
572 573
	return hash_val;
}
574

575 576 577 578 579 580 581
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
					      struct net *net,
					      const void *pkey,
					      int key_len,
					      struct net_device *dev)
{
	while (n) {
582
		if (!memcmp(n->key, pkey, key_len) &&
583
		    net_eq(pneigh_net(n), net) &&
584
		    (n->dev == dev || !n->dev))
585 586
			return n;
		n = n->next;
587
	}
588 589
	return NULL;
}
590

591 592 593 594 595 596 597 598
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
		struct net *net, const void *pkey, struct net_device *dev)
{
	int key_len = tbl->key_len;
	u32 hash_val = pneigh_hash(pkey, key_len);

	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
				 net, pkey, key_len, dev);
599
}
600
EXPORT_SYMBOL_GPL(__pneigh_lookup);
601

602 603
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
				    struct net *net, const void *pkey,
L
Linus Torvalds 已提交
604 605 606 607
				    struct net_device *dev, int creat)
{
	struct pneigh_entry *n;
	int key_len = tbl->key_len;
608
	u32 hash_val = pneigh_hash(pkey, key_len);
L
Linus Torvalds 已提交
609 610

	read_lock_bh(&tbl->lock);
611 612
	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
			      net, pkey, key_len, dev);
L
Linus Torvalds 已提交
613
	read_unlock_bh(&tbl->lock);
614 615

	if (n || !creat)
L
Linus Torvalds 已提交
616 617
		goto out;

618 619
	ASSERT_RTNL();

L
Linus Torvalds 已提交
620 621 622 623
	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
	if (!n)
		goto out;

E
Eric Dumazet 已提交
624
	write_pnet(&n->net, hold_net(net));
L
Linus Torvalds 已提交
625 626 627 628 629 630 631 632
	memcpy(n->key, pkey, key_len);
	n->dev = dev;
	if (dev)
		dev_hold(dev);

	if (tbl->pconstructor && tbl->pconstructor(n)) {
		if (dev)
			dev_put(dev);
633
		release_net(net);
L
Linus Torvalds 已提交
634 635 636 637 638 639 640 641 642 643 644 645
		kfree(n);
		n = NULL;
		goto out;
	}

	write_lock_bh(&tbl->lock);
	n->next = tbl->phash_buckets[hash_val];
	tbl->phash_buckets[hash_val] = n;
	write_unlock_bh(&tbl->lock);
out:
	return n;
}
646
EXPORT_SYMBOL(pneigh_lookup);
L
Linus Torvalds 已提交
647 648


649
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
L
Linus Torvalds 已提交
650 651 652 653
		  struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	int key_len = tbl->key_len;
654
	u32 hash_val = pneigh_hash(pkey, key_len);
L
Linus Torvalds 已提交
655 656 657 658

	write_lock_bh(&tbl->lock);
	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
	     np = &n->next) {
659
		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
660
		    net_eq(pneigh_net(n), net)) {
L
Linus Torvalds 已提交
661 662 663 664 665 666
			*np = n->next;
			write_unlock_bh(&tbl->lock);
			if (tbl->pdestructor)
				tbl->pdestructor(n);
			if (n->dev)
				dev_put(n->dev);
667
			release_net(pneigh_net(n));
L
Linus Torvalds 已提交
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
			kfree(n);
			return 0;
		}
	}
	write_unlock_bh(&tbl->lock);
	return -ENOENT;
}

static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	u32 h;

	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
		np = &tbl->phash_buckets[h];
		while ((n = *np) != NULL) {
			if (!dev || n->dev == dev) {
				*np = n->next;
				if (tbl->pdestructor)
					tbl->pdestructor(n);
				if (n->dev)
					dev_put(n->dev);
690
				release_net(pneigh_net(n));
L
Linus Torvalds 已提交
691 692 693 694 695 696 697 698 699
				kfree(n);
				continue;
			}
			np = &n->next;
		}
	}
	return -ENOENT;
}

700 701 702 703 704 705 706
static void neigh_parms_destroy(struct neigh_parms *parms);

static inline void neigh_parms_put(struct neigh_parms *parms)
{
	if (atomic_dec_and_test(&parms->refcnt))
		neigh_parms_destroy(parms);
}
L
Linus Torvalds 已提交
707 708 709 710 711 712 713

/*
 *	neighbour must already be out of the table;
 *
 */
void neigh_destroy(struct neighbour *neigh)
{
714 715
	struct net_device *dev = neigh->dev;

L
Linus Torvalds 已提交
716 717 718
	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);

	if (!neigh->dead) {
J
Joe Perches 已提交
719
		pr_warn("Destroying alive neighbour %p\n", neigh);
L
Linus Torvalds 已提交
720 721 722 723 724
		dump_stack();
		return;
	}

	if (neigh_del_timer(neigh))
J
Joe Perches 已提交
725
		pr_warn("Impossible event\n");
L
Linus Torvalds 已提交
726 727

	skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
728
	neigh->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
729

730 731 732
	if (dev->netdev_ops->ndo_neigh_destroy)
		dev->netdev_ops->ndo_neigh_destroy(neigh);

733
	dev_put(dev);
L
Linus Torvalds 已提交
734 735 736 737 738
	neigh_parms_put(neigh->parms);

	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);

	atomic_dec(&neigh->tbl->entries);
739
	kfree_rcu(neigh, rcu);
L
Linus Torvalds 已提交
740
}
741
EXPORT_SYMBOL(neigh_destroy);
L
Linus Torvalds 已提交
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766

/* Neighbour state is suspicious;
   disable fast path.

   Called with write_locked neigh.
 */
static void neigh_suspect(struct neighbour *neigh)
{
	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);

	neigh->output = neigh->ops->output;
}

/* Neighbour state is OK;
   enable fast path.

   Called with write_locked neigh.
 */
static void neigh_connect(struct neighbour *neigh)
{
	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);

	neigh->output = neigh->ops->connected_output;
}

767
static void neigh_periodic_work(struct work_struct *work)
L
Linus Torvalds 已提交
768
{
769
	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
770 771
	struct neighbour *n;
	struct neighbour __rcu **np;
772
	unsigned int i;
773
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
774 775 776

	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);

777
	write_lock_bh(&tbl->lock);
778 779
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
780

781 782 783
	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
		goto out;

L
Linus Torvalds 已提交
784 785 786 787
	/*
	 *	periodically recompute ReachableTime from random function
	 */

788
	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
L
Linus Torvalds 已提交
789
		struct neigh_parms *p;
790
		tbl->last_rand = jiffies;
L
Linus Torvalds 已提交
791 792 793 794 795
		for (p = &tbl->parms; p; p = p->next)
			p->reachable_time =
				neigh_rand_reach_time(p->base_reachable_time);
	}

796
	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
797
		np = &nht->hash_buckets[i];
L
Linus Torvalds 已提交
798

799 800
		while ((n = rcu_dereference_protected(*np,
				lockdep_is_held(&tbl->lock))) != NULL) {
801
			unsigned int state;
L
Linus Torvalds 已提交
802

803
			write_lock(&n->lock);
L
Linus Torvalds 已提交
804

805 806 807 808 809
			state = n->nud_state;
			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
				write_unlock(&n->lock);
				goto next_elt;
			}
L
Linus Torvalds 已提交
810

811 812
			if (time_before(n->used, n->confirmed))
				n->used = n->confirmed;
L
Linus Torvalds 已提交
813

814 815 816 817 818 819 820 821 822
			if (atomic_read(&n->refcnt) == 1 &&
			    (state == NUD_FAILED ||
			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
				*np = n->next;
				n->dead = 1;
				write_unlock(&n->lock);
				neigh_cleanup_and_release(n);
				continue;
			}
L
Linus Torvalds 已提交
823 824 825
			write_unlock(&n->lock);

next_elt:
826 827 828 829 830 831 832 833 834
			np = &n->next;
		}
		/*
		 * It's fine to release lock here, even if hash table
		 * grows while we are preempted.
		 */
		write_unlock_bh(&tbl->lock);
		cond_resched();
		write_lock_bh(&tbl->lock);
835 836
		nht = rcu_dereference_protected(tbl->nht,
						lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
837
	}
838
out:
839 840 841
	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
	 * base_reachable_time.
L
Linus Torvalds 已提交
842
	 */
843 844 845
	schedule_delayed_work(&tbl->gc_work,
			      tbl->parms.base_reachable_time >> 1);
	write_unlock_bh(&tbl->lock);
L
Linus Torvalds 已提交
846 847 848 849 850
}

static __inline__ int neigh_max_probes(struct neighbour *n)
{
	struct neigh_parms *p = n->parms;
E
Eric Dumazet 已提交
851
	return (n->nud_state & NUD_PROBE) ?
L
Linus Torvalds 已提交
852
		p->ucast_probes :
E
Eric Dumazet 已提交
853
		p->ucast_probes + p->app_probes + p->mcast_probes;
L
Linus Torvalds 已提交
854 855
}

856
static void neigh_invalidate(struct neighbour *neigh)
E
Eric Dumazet 已提交
857 858
	__releases(neigh->lock)
	__acquires(neigh->lock)
859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877
{
	struct sk_buff *skb;

	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
	neigh->updated = jiffies;

	/* It is very thin place. report_unreachable is very complicated
	   routine. Particularly, it can hit the same neighbour entry!

	   So that, we try to be accurate and avoid dead loop. --ANK
	 */
	while (neigh->nud_state == NUD_FAILED &&
	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
		write_unlock(&neigh->lock);
		neigh->ops->error_report(neigh, skb);
		write_lock(&neigh->lock);
	}
	skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
878
	neigh->arp_queue_len_bytes = 0;
879 880
}

E
Eric Dumazet 已提交
881 882 883 884 885 886 887 888 889 890 891 892 893
static void neigh_probe(struct neighbour *neigh)
	__releases(neigh->lock)
{
	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
	/* keep skb alive even if arp_queue overflows */
	if (skb)
		skb = skb_copy(skb, GFP_ATOMIC);
	write_unlock(&neigh->lock);
	neigh->ops->solicit(neigh, skb);
	atomic_inc(&neigh->probes);
	kfree_skb(skb);
}

L
Linus Torvalds 已提交
894 895 896 897 898 899
/* Called when a timer expires for a neighbour entry. */

static void neigh_timer_handler(unsigned long arg)
{
	unsigned long now, next;
	struct neighbour *neigh = (struct neighbour *)arg;
900
	unsigned int state;
L
Linus Torvalds 已提交
901 902 903 904 905 906 907 908
	int notify = 0;

	write_lock(&neigh->lock);

	state = neigh->nud_state;
	now = jiffies;
	next = now + HZ;

909
	if (!(state & NUD_IN_TIMER))
L
Linus Torvalds 已提交
910 911 912
		goto out;

	if (state & NUD_REACHABLE) {
913
		if (time_before_eq(now,
L
Linus Torvalds 已提交
914 915 916 917 918 919 920
				   neigh->confirmed + neigh->parms->reachable_time)) {
			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else if (time_before_eq(now,
					  neigh->used + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
			neigh->nud_state = NUD_DELAY;
921
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
922 923 924 925 926
			neigh_suspect(neigh);
			next = now + neigh->parms->delay_probe_time;
		} else {
			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
			neigh->nud_state = NUD_STALE;
927
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
928
			neigh_suspect(neigh);
929
			notify = 1;
L
Linus Torvalds 已提交
930 931
		}
	} else if (state & NUD_DELAY) {
932
		if (time_before_eq(now,
L
Linus Torvalds 已提交
933 934 935
				   neigh->confirmed + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
			neigh->nud_state = NUD_REACHABLE;
936
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
937
			neigh_connect(neigh);
938
			notify = 1;
L
Linus Torvalds 已提交
939 940 941 942
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else {
			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
			neigh->nud_state = NUD_PROBE;
943
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
944 945 946 947 948 949 950 951 952 953 954 955
			atomic_set(&neigh->probes, 0);
			next = now + neigh->parms->retrans_time;
		}
	} else {
		/* NUD_PROBE|NUD_INCOMPLETE */
		next = now + neigh->parms->retrans_time;
	}

	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
		neigh->nud_state = NUD_FAILED;
		notify = 1;
956
		neigh_invalidate(neigh);
L
Linus Torvalds 已提交
957 958 959 960 961
	}

	if (neigh->nud_state & NUD_IN_TIMER) {
		if (time_before(next, jiffies + HZ/2))
			next = jiffies + HZ/2;
962 963
		if (!mod_timer(&neigh->timer, next))
			neigh_hold(neigh);
L
Linus Torvalds 已提交
964 965
	}
	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
E
Eric Dumazet 已提交
966
		neigh_probe(neigh);
967
	} else {
968
out:
969 970
		write_unlock(&neigh->lock);
	}
T
Thomas Graf 已提交
971

972
	if (notify)
T
Thomas Graf 已提交
973
		neigh_update_notify(neigh);
L
Linus Torvalds 已提交
974 975 976 977 978 979 980

	neigh_release(neigh);
}

int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	int rc;
E
Eric Dumazet 已提交
981
	bool immediate_probe = false;
L
Linus Torvalds 已提交
982 983 984 985 986 987 988 989 990

	write_lock_bh(&neigh->lock);

	rc = 0;
	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
		goto out_unlock_bh;

	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
E
Eric Dumazet 已提交
991 992
			unsigned long next, now = jiffies;

L
Linus Torvalds 已提交
993 994
			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
			neigh->nud_state     = NUD_INCOMPLETE;
E
Eric Dumazet 已提交
995 996 997 998
			neigh->updated = now;
			next = now + max(neigh->parms->retrans_time, HZ/2);
			neigh_add_timer(neigh, next);
			immediate_probe = true;
L
Linus Torvalds 已提交
999 1000
		} else {
			neigh->nud_state = NUD_FAILED;
1001
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
1002 1003
			write_unlock_bh(&neigh->lock);

1004
			kfree_skb(skb);
L
Linus Torvalds 已提交
1005 1006 1007 1008 1009
			return 1;
		}
	} else if (neigh->nud_state & NUD_STALE) {
		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
		neigh->nud_state = NUD_DELAY;
1010
		neigh->updated = jiffies;
1011 1012
		neigh_add_timer(neigh,
				jiffies + neigh->parms->delay_probe_time);
L
Linus Torvalds 已提交
1013 1014 1015 1016
	}

	if (neigh->nud_state == NUD_INCOMPLETE) {
		if (skb) {
E
Eric Dumazet 已提交
1017 1018
			while (neigh->arp_queue_len_bytes + skb->truesize >
			       neigh->parms->queue_len_bytes) {
L
Linus Torvalds 已提交
1019
				struct sk_buff *buff;
E
Eric Dumazet 已提交
1020

1021
				buff = __skb_dequeue(&neigh->arp_queue);
E
Eric Dumazet 已提交
1022 1023 1024
				if (!buff)
					break;
				neigh->arp_queue_len_bytes -= buff->truesize;
L
Linus Torvalds 已提交
1025
				kfree_skb(buff);
1026
				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
L
Linus Torvalds 已提交
1027
			}
E
Eric Dumazet 已提交
1028
			skb_dst_force(skb);
L
Linus Torvalds 已提交
1029
			__skb_queue_tail(&neigh->arp_queue, skb);
E
Eric Dumazet 已提交
1030
			neigh->arp_queue_len_bytes += skb->truesize;
L
Linus Torvalds 已提交
1031 1032 1033 1034
		}
		rc = 1;
	}
out_unlock_bh:
E
Eric Dumazet 已提交
1035 1036 1037 1038 1039
	if (immediate_probe)
		neigh_probe(neigh);
	else
		write_unlock(&neigh->lock);
	local_bh_enable();
L
Linus Torvalds 已提交
1040 1041
	return rc;
}
1042
EXPORT_SYMBOL(__neigh_event_send);
L
Linus Torvalds 已提交
1043

1044
static void neigh_update_hhs(struct neighbour *neigh)
L
Linus Torvalds 已提交
1045 1046
{
	struct hh_cache *hh;
1047
	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
D
Doug Kehn 已提交
1048 1049 1050 1051
		= NULL;

	if (neigh->dev->header_ops)
		update = neigh->dev->header_ops->cache_update;
L
Linus Torvalds 已提交
1052 1053

	if (update) {
1054 1055
		hh = &neigh->hh;
		if (hh->hh_len) {
1056
			write_seqlock_bh(&hh->hh_lock);
L
Linus Torvalds 已提交
1057
			update(hh, neigh->dev, neigh->ha);
1058
			write_sequnlock_bh(&hh->hh_lock);
L
Linus Torvalds 已提交
1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
		}
	}
}



/* Generic update routine.
   -- lladdr is new lladdr or NULL, if it is not supplied.
   -- new    is new state.
   -- flags
	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
				if it is different.
	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1072
				lladdr instead of overriding it
L
Linus Torvalds 已提交
1073 1074 1075 1076 1077
				if it is different.
				It also allows to retain current state
				if lladdr is unchanged.
	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.

1078
	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
L
Linus Torvalds 已提交
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
				NTF_ROUTER flag.
	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
				a router.

   Caller MUST hold reference count on the entry.
 */

int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
		 u32 flags)
{
	u8 old;
	int err;
	int notify = 0;
	struct net_device *dev;
	int update_isrouter = 0;

	write_lock_bh(&neigh->lock);

	dev    = neigh->dev;
	old    = neigh->nud_state;
	err    = -EPERM;

1101
	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
L
Linus Torvalds 已提交
1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
	    (old & (NUD_NOARP | NUD_PERMANENT)))
		goto out;

	if (!(new & NUD_VALID)) {
		neigh_del_timer(neigh);
		if (old & NUD_CONNECTED)
			neigh_suspect(neigh);
		neigh->nud_state = new;
		err = 0;
		notify = old & NUD_VALID;
1112 1113 1114 1115 1116
		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
		    (new & NUD_FAILED)) {
			neigh_invalidate(neigh);
			notify = 1;
		}
L
Linus Torvalds 已提交
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
		goto out;
	}

	/* Compare new lladdr with cached one */
	if (!dev->addr_len) {
		/* First case: device needs no address. */
		lladdr = neigh->ha;
	} else if (lladdr) {
		/* The second case: if something is already cached
		   and a new address is proposed:
		   - compare new & old
		   - if they are different, check override flag
		 */
1130
		if ((old & NUD_VALID) &&
L
Linus Torvalds 已提交
1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
		    !memcmp(lladdr, neigh->ha, dev->addr_len))
			lladdr = neigh->ha;
	} else {
		/* No address is supplied; if we know something,
		   use it, otherwise discard the request.
		 */
		err = -EINVAL;
		if (!(old & NUD_VALID))
			goto out;
		lladdr = neigh->ha;
	}

	if (new & NUD_CONNECTED)
		neigh->confirmed = jiffies;
	neigh->updated = jiffies;

	/* If entry was valid and address is not changed,
	   do not change entry state, if new one is STALE.
	 */
	err = 0;
	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
	if (old & NUD_VALID) {
		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
			update_isrouter = 0;
			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
			    (old & NUD_CONNECTED)) {
				lladdr = neigh->ha;
				new = NUD_STALE;
			} else
				goto out;
		} else {
			if (lladdr == neigh->ha && new == NUD_STALE &&
			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
			     (old & NUD_CONNECTED))
			    )
				new = old;
		}
	}

	if (new != old) {
		neigh_del_timer(neigh);
1172
		if (new & NUD_IN_TIMER)
1173 1174
			neigh_add_timer(neigh, (jiffies +
						((new & NUD_REACHABLE) ?
1175 1176
						 neigh->parms->reachable_time :
						 0)));
L
Linus Torvalds 已提交
1177 1178 1179 1180
		neigh->nud_state = new;
	}

	if (lladdr != neigh->ha) {
1181
		write_seqlock(&neigh->ha_lock);
L
Linus Torvalds 已提交
1182
		memcpy(&neigh->ha, lladdr, dev->addr_len);
1183
		write_sequnlock(&neigh->ha_lock);
L
Linus Torvalds 已提交
1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202
		neigh_update_hhs(neigh);
		if (!(new & NUD_CONNECTED))
			neigh->confirmed = jiffies -
				      (neigh->parms->base_reachable_time << 1);
		notify = 1;
	}
	if (new == old)
		goto out;
	if (new & NUD_CONNECTED)
		neigh_connect(neigh);
	else
		neigh_suspect(neigh);
	if (!(old & NUD_VALID)) {
		struct sk_buff *skb;

		/* Again: avoid dead loop if something went wrong */

		while (neigh->nud_state & NUD_VALID &&
		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1203 1204
			struct dst_entry *dst = skb_dst(skb);
			struct neighbour *n2, *n1 = neigh;
L
Linus Torvalds 已提交
1205
			write_unlock_bh(&neigh->lock);
1206 1207

			rcu_read_lock();
1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221

			/* Why not just use 'neigh' as-is?  The problem is that
			 * things such as shaper, eql, and sch_teql can end up
			 * using alternative, different, neigh objects to output
			 * the packet in the output path.  So what we need to do
			 * here is re-lookup the top-level neigh in the path so
			 * we can reinject the packet there.
			 */
			n2 = NULL;
			if (dst) {
				n2 = dst_neigh_lookup_skb(dst, skb);
				if (n2)
					n1 = n2;
			}
1222
			n1->output(n1, skb);
1223 1224
			if (n2)
				neigh_release(n2);
1225 1226
			rcu_read_unlock();

L
Linus Torvalds 已提交
1227 1228 1229
			write_lock_bh(&neigh->lock);
		}
		skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
1230
		neigh->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
1231 1232 1233 1234 1235 1236 1237 1238
	}
out:
	if (update_isrouter) {
		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
			(neigh->flags | NTF_ROUTER) :
			(neigh->flags & ~NTF_ROUTER);
	}
	write_unlock_bh(&neigh->lock);
1239 1240

	if (notify)
T
Thomas Graf 已提交
1241 1242
		neigh_update_notify(neigh);

L
Linus Torvalds 已提交
1243 1244
	return err;
}
1245
EXPORT_SYMBOL(neigh_update);
L
Linus Torvalds 已提交
1246 1247 1248 1249 1250 1251 1252 1253

struct neighbour *neigh_event_ns(struct neigh_table *tbl,
				 u8 *lladdr, void *saddr,
				 struct net_device *dev)
{
	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
						 lladdr || !dev->addr_len);
	if (neigh)
1254
		neigh_update(neigh, lladdr, NUD_STALE,
L
Linus Torvalds 已提交
1255 1256 1257
			     NEIGH_UPDATE_F_OVERRIDE);
	return neigh;
}
1258
EXPORT_SYMBOL(neigh_event_ns);
L
Linus Torvalds 已提交
1259

E
Eric Dumazet 已提交
1260
/* called with read_lock_bh(&n->lock); */
1261
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
L
Linus Torvalds 已提交
1262 1263
{
	struct net_device *dev = dst->dev;
1264 1265
	__be16 prot = dst->ops->protocol;
	struct hh_cache	*hh = &n->hh;
1266 1267

	write_lock_bh(&n->lock);
E
Eric Dumazet 已提交
1268

1269 1270 1271
	/* Only one thread can come in here and initialize the
	 * hh_cache entry.
	 */
1272 1273
	if (!hh->hh_len)
		dev->header_ops->cache(n, hh, prot);
E
Eric Dumazet 已提交
1274

1275
	write_unlock_bh(&n->lock);
L
Linus Torvalds 已提交
1276 1277 1278
}

/* This function can be used in contexts, where only old dev_queue_xmit
1279 1280
 * worked, f.e. if you want to override normal output path (eql, shaper),
 * but resolution is not made yet.
L
Linus Torvalds 已提交
1281 1282
 */

1283
int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1284 1285 1286
{
	struct net_device *dev = skb->dev;

1287
	__skb_pull(skb, skb_network_offset(skb));
L
Linus Torvalds 已提交
1288

1289 1290
	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
			    skb->len) < 0 &&
1291
	    dev->header_ops->rebuild(skb))
L
Linus Torvalds 已提交
1292 1293 1294 1295
		return 0;

	return dev_queue_xmit(skb);
}
1296
EXPORT_SYMBOL(neigh_compat_output);
L
Linus Torvalds 已提交
1297 1298 1299

/* Slow and careful. */

1300
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1301
{
E
Eric Dumazet 已提交
1302
	struct dst_entry *dst = skb_dst(skb);
L
Linus Torvalds 已提交
1303 1304
	int rc = 0;

1305
	if (!dst)
L
Linus Torvalds 已提交
1306 1307 1308 1309 1310
		goto discard;

	if (!neigh_event_send(neigh, skb)) {
		int err;
		struct net_device *dev = neigh->dev;
1311
		unsigned int seq;
E
Eric Dumazet 已提交
1312

1313 1314
		if (dev->header_ops->cache && !neigh->hh.hh_len)
			neigh_hh_init(neigh, dst);
E
Eric Dumazet 已提交
1315

1316
		do {
1317
			__skb_pull(skb, skb_network_offset(skb));
1318 1319 1320 1321
			seq = read_seqbegin(&neigh->ha_lock);
			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
					      neigh->ha, NULL, skb->len);
		} while (read_seqretry(&neigh->ha_lock, seq));
E
Eric Dumazet 已提交
1322

L
Linus Torvalds 已提交
1323
		if (err >= 0)
1324
			rc = dev_queue_xmit(skb);
L
Linus Torvalds 已提交
1325 1326 1327 1328 1329 1330 1331
		else
			goto out_kfree_skb;
	}
out:
	return rc;
discard:
	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1332
		      dst, neigh);
L
Linus Torvalds 已提交
1333 1334 1335 1336 1337
out_kfree_skb:
	rc = -EINVAL;
	kfree_skb(skb);
	goto out;
}
1338
EXPORT_SYMBOL(neigh_resolve_output);
L
Linus Torvalds 已提交
1339 1340 1341

/* As fast as possible without hh cache */

1342
int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1343 1344
{
	struct net_device *dev = neigh->dev;
1345
	unsigned int seq;
1346
	int err;
L
Linus Torvalds 已提交
1347

1348
	do {
1349
		__skb_pull(skb, skb_network_offset(skb));
1350 1351 1352 1353 1354
		seq = read_seqbegin(&neigh->ha_lock);
		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
				      neigh->ha, NULL, skb->len);
	} while (read_seqretry(&neigh->ha_lock, seq));

L
Linus Torvalds 已提交
1355
	if (err >= 0)
1356
		err = dev_queue_xmit(skb);
L
Linus Torvalds 已提交
1357 1358 1359 1360 1361 1362
	else {
		err = -EINVAL;
		kfree_skb(skb);
	}
	return err;
}
1363
EXPORT_SYMBOL(neigh_connected_output);
L
Linus Torvalds 已提交
1364

1365 1366 1367 1368 1369 1370
int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
{
	return dev_queue_xmit(skb);
}
EXPORT_SYMBOL(neigh_direct_output);

L
Linus Torvalds 已提交
1371 1372 1373 1374 1375
static void neigh_proxy_process(unsigned long arg)
{
	struct neigh_table *tbl = (struct neigh_table *)arg;
	long sched_next = 0;
	unsigned long now = jiffies;
1376
	struct sk_buff *skb, *n;
L
Linus Torvalds 已提交
1377 1378 1379

	spin_lock(&tbl->proxy_queue.lock);

1380 1381
	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
		long tdif = NEIGH_CB(skb)->sched_next - now;
L
Linus Torvalds 已提交
1382 1383

		if (tdif <= 0) {
1384
			struct net_device *dev = skb->dev;
1385

1386
			__skb_unlink(skb, &tbl->proxy_queue);
1387 1388
			if (tbl->proxy_redo && netif_running(dev)) {
				rcu_read_lock();
1389
				tbl->proxy_redo(skb);
1390 1391
				rcu_read_unlock();
			} else {
1392
				kfree_skb(skb);
1393
			}
L
Linus Torvalds 已提交
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414

			dev_put(dev);
		} else if (!sched_next || tdif < sched_next)
			sched_next = tdif;
	}
	del_timer(&tbl->proxy_timer);
	if (sched_next)
		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
	spin_unlock(&tbl->proxy_queue.lock);
}

void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
		    struct sk_buff *skb)
{
	unsigned long now = jiffies;
	unsigned long sched_next = now + (net_random() % p->proxy_delay);

	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
		kfree_skb(skb);
		return;
	}
1415 1416 1417

	NEIGH_CB(skb)->sched_next = sched_next;
	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
L
Linus Torvalds 已提交
1418 1419 1420 1421 1422 1423

	spin_lock(&tbl->proxy_queue.lock);
	if (del_timer(&tbl->proxy_timer)) {
		if (time_before(tbl->proxy_timer.expires, sched_next))
			sched_next = tbl->proxy_timer.expires;
	}
E
Eric Dumazet 已提交
1424
	skb_dst_drop(skb);
L
Linus Torvalds 已提交
1425 1426 1427 1428 1429
	dev_hold(skb->dev);
	__skb_queue_tail(&tbl->proxy_queue, skb);
	mod_timer(&tbl->proxy_timer, sched_next);
	spin_unlock(&tbl->proxy_queue.lock);
}
1430
EXPORT_SYMBOL(pneigh_enqueue);
L
Linus Torvalds 已提交
1431

1432
static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1433 1434 1435 1436 1437
						      struct net *net, int ifindex)
{
	struct neigh_parms *p;

	for (p = &tbl->parms; p; p = p->next) {
1438
		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1439 1440 1441 1442 1443 1444
		    (!p->dev && !ifindex))
			return p;
	}

	return NULL;
}
L
Linus Torvalds 已提交
1445 1446 1447 1448

struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
				      struct neigh_table *tbl)
{
1449
	struct neigh_parms *p, *ref;
1450 1451
	struct net *net = dev_net(dev);
	const struct net_device_ops *ops = dev->netdev_ops;
1452

1453
	ref = lookup_neigh_parms(tbl, net, 0);
1454 1455
	if (!ref)
		return NULL;
L
Linus Torvalds 已提交
1456

1457
	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
1458 1459 1460 1461 1462
	if (p) {
		p->tbl		  = tbl;
		atomic_set(&p->refcnt, 1);
		p->reachable_time =
				neigh_rand_reach_time(p->base_reachable_time);
1463

1464
		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1465 1466
			kfree(p);
			return NULL;
L
Linus Torvalds 已提交
1467
		}
1468 1469 1470

		dev_hold(dev);
		p->dev = dev;
E
Eric Dumazet 已提交
1471
		write_pnet(&p->net, hold_net(net));
L
Linus Torvalds 已提交
1472 1473 1474 1475 1476 1477 1478 1479
		p->sysctl_table = NULL;
		write_lock_bh(&tbl->lock);
		p->next		= tbl->parms.next;
		tbl->parms.next = p;
		write_unlock_bh(&tbl->lock);
	}
	return p;
}
1480
EXPORT_SYMBOL(neigh_parms_alloc);
L
Linus Torvalds 已提交
1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501

static void neigh_rcu_free_parms(struct rcu_head *head)
{
	struct neigh_parms *parms =
		container_of(head, struct neigh_parms, rcu_head);

	neigh_parms_put(parms);
}

void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
	struct neigh_parms **p;

	if (!parms || parms == &tbl->parms)
		return;
	write_lock_bh(&tbl->lock);
	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
		if (*p == parms) {
			*p = parms->next;
			parms->dead = 1;
			write_unlock_bh(&tbl->lock);
1502 1503
			if (parms->dev)
				dev_put(parms->dev);
L
Linus Torvalds 已提交
1504 1505 1506 1507 1508 1509 1510
			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
			return;
		}
	}
	write_unlock_bh(&tbl->lock);
	NEIGH_PRINTK1("neigh_parms_release: not found\n");
}
1511
EXPORT_SYMBOL(neigh_parms_release);
L
Linus Torvalds 已提交
1512

1513
static void neigh_parms_destroy(struct neigh_parms *parms)
L
Linus Torvalds 已提交
1514
{
1515
	release_net(neigh_parms_net(parms));
L
Linus Torvalds 已提交
1516 1517 1518
	kfree(parms);
}

1519 1520
static struct lock_class_key neigh_table_proxy_queue_class;

1521
static void neigh_table_init_no_netlink(struct neigh_table *tbl)
L
Linus Torvalds 已提交
1522 1523 1524 1525
{
	unsigned long now = jiffies;
	unsigned long phsize;

E
Eric Dumazet 已提交
1526
	write_pnet(&tbl->parms.net, &init_net);
L
Linus Torvalds 已提交
1527 1528 1529 1530 1531 1532 1533
	atomic_set(&tbl->parms.refcnt, 1);
	tbl->parms.reachable_time =
			  neigh_rand_reach_time(tbl->parms.base_reachable_time);

	tbl->stats = alloc_percpu(struct neigh_statistics);
	if (!tbl->stats)
		panic("cannot create neighbour cache statistics");
1534

L
Linus Torvalds 已提交
1535
#ifdef CONFIG_PROC_FS
1536 1537
	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
			      &neigh_stat_seq_fops, tbl))
L
Linus Torvalds 已提交
1538 1539 1540
		panic("cannot create neighbour proc dir entry");
#endif

1541
	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
L
Linus Torvalds 已提交
1542 1543

	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
A
Andrew Morton 已提交
1544
	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
L
Linus Torvalds 已提交
1545

1546
	if (!tbl->nht || !tbl->phash_buckets)
L
Linus Torvalds 已提交
1547 1548 1549
		panic("cannot allocate neighbour cache hashes");

	rwlock_init(&tbl->lock);
1550
	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1551
	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1552
	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1553 1554
	skb_queue_head_init_class(&tbl->proxy_queue,
			&neigh_table_proxy_queue_class);
L
Linus Torvalds 已提交
1555 1556 1557

	tbl->last_flush = now;
	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1558 1559 1560 1561 1562 1563 1564
}

void neigh_table_init(struct neigh_table *tbl)
{
	struct neigh_table *tmp;

	neigh_table_init_no_netlink(tbl);
L
Linus Torvalds 已提交
1565
	write_lock(&neigh_tbl_lock);
1566 1567 1568 1569
	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
		if (tmp->family == tbl->family)
			break;
	}
L
Linus Torvalds 已提交
1570 1571 1572
	tbl->next	= neigh_tables;
	neigh_tables	= tbl;
	write_unlock(&neigh_tbl_lock);
1573 1574

	if (unlikely(tmp)) {
J
Joe Perches 已提交
1575 1576
		pr_err("Registering multiple tables for family %d\n",
		       tbl->family);
1577 1578
		dump_stack();
	}
L
Linus Torvalds 已提交
1579
}
1580
EXPORT_SYMBOL(neigh_table_init);
L
Linus Torvalds 已提交
1581 1582 1583 1584 1585 1586

int neigh_table_clear(struct neigh_table *tbl)
{
	struct neigh_table **tp;

	/* It is not clean... Fix it to unload IPv6 module safely */
1587
	cancel_delayed_work_sync(&tbl->gc_work);
L
Linus Torvalds 已提交
1588 1589 1590 1591
	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	neigh_ifdown(tbl, NULL);
	if (atomic_read(&tbl->entries))
J
Joe Perches 已提交
1592
		pr_crit("neighbour leakage\n");
L
Linus Torvalds 已提交
1593 1594 1595 1596 1597 1598 1599 1600 1601
	write_lock(&neigh_tbl_lock);
	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
		if (*tp == tbl) {
			*tp = tbl->next;
			break;
		}
	}
	write_unlock(&neigh_tbl_lock);

E
Eric Dumazet 已提交
1602 1603
	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
		 neigh_hash_free_rcu);
1604
	tbl->nht = NULL;
L
Linus Torvalds 已提交
1605 1606 1607 1608

	kfree(tbl->phash_buckets);
	tbl->phash_buckets = NULL;

1609 1610
	remove_proc_entry(tbl->id, init_net.proc_net_stat);

1611 1612 1613
	free_percpu(tbl->stats);
	tbl->stats = NULL;

L
Linus Torvalds 已提交
1614 1615
	return 0;
}
1616
EXPORT_SYMBOL(neigh_table_clear);
L
Linus Torvalds 已提交
1617

1618
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
L
Linus Torvalds 已提交
1619
{
1620
	struct net *net = sock_net(skb->sk);
1621 1622
	struct ndmsg *ndm;
	struct nlattr *dst_attr;
L
Linus Torvalds 已提交
1623 1624
	struct neigh_table *tbl;
	struct net_device *dev = NULL;
1625
	int err = -EINVAL;
L
Linus Torvalds 已提交
1626

1627
	ASSERT_RTNL();
1628
	if (nlmsg_len(nlh) < sizeof(*ndm))
L
Linus Torvalds 已提交
1629 1630
		goto out;

1631 1632 1633 1634 1635 1636
	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
	if (dst_attr == NULL)
		goto out;

	ndm = nlmsg_data(nlh);
	if (ndm->ndm_ifindex) {
1637
		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1638 1639 1640 1641 1642 1643
		if (dev == NULL) {
			err = -ENODEV;
			goto out;
		}
	}

L
Linus Torvalds 已提交
1644 1645
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1646
		struct neighbour *neigh;
L
Linus Torvalds 已提交
1647 1648 1649 1650 1651

		if (tbl->family != ndm->ndm_family)
			continue;
		read_unlock(&neigh_tbl_lock);

1652
		if (nla_len(dst_attr) < tbl->key_len)
1653
			goto out;
L
Linus Torvalds 已提交
1654 1655

		if (ndm->ndm_flags & NTF_PROXY) {
1656
			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1657
			goto out;
L
Linus Torvalds 已提交
1658 1659
		}

1660
		if (dev == NULL)
1661
			goto out;
L
Linus Torvalds 已提交
1662

1663 1664 1665
		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
		if (neigh == NULL) {
			err = -ENOENT;
1666
			goto out;
L
Linus Torvalds 已提交
1667
		}
1668 1669 1670 1671 1672

		err = neigh_update(neigh, NULL, NUD_FAILED,
				   NEIGH_UPDATE_F_OVERRIDE |
				   NEIGH_UPDATE_F_ADMIN);
		neigh_release(neigh);
1673
		goto out;
L
Linus Torvalds 已提交
1674 1675
	}
	read_unlock(&neigh_tbl_lock);
1676 1677
	err = -EAFNOSUPPORT;

L
Linus Torvalds 已提交
1678 1679 1680 1681
out:
	return err;
}

1682
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
L
Linus Torvalds 已提交
1683
{
1684
	struct net *net = sock_net(skb->sk);
1685 1686
	struct ndmsg *ndm;
	struct nlattr *tb[NDA_MAX+1];
L
Linus Torvalds 已提交
1687 1688
	struct neigh_table *tbl;
	struct net_device *dev = NULL;
1689
	int err;
L
Linus Torvalds 已提交
1690

1691
	ASSERT_RTNL();
1692 1693
	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
	if (err < 0)
L
Linus Torvalds 已提交
1694 1695
		goto out;

1696 1697 1698 1699 1700 1701
	err = -EINVAL;
	if (tb[NDA_DST] == NULL)
		goto out;

	ndm = nlmsg_data(nlh);
	if (ndm->ndm_ifindex) {
1702
		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1703 1704 1705 1706 1707 1708
		if (dev == NULL) {
			err = -ENODEV;
			goto out;
		}

		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1709
			goto out;
1710 1711
	}

L
Linus Torvalds 已提交
1712 1713
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1714 1715 1716
		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
		struct neighbour *neigh;
		void *dst, *lladdr;
L
Linus Torvalds 已提交
1717 1718 1719 1720 1721

		if (tbl->family != ndm->ndm_family)
			continue;
		read_unlock(&neigh_tbl_lock);

1722
		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1723
			goto out;
1724 1725
		dst = nla_data(tb[NDA_DST]);
		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
L
Linus Torvalds 已提交
1726 1727

		if (ndm->ndm_flags & NTF_PROXY) {
1728 1729 1730
			struct pneigh_entry *pn;

			err = -ENOBUFS;
1731
			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1732 1733 1734 1735
			if (pn) {
				pn->flags = ndm->ndm_flags;
				err = 0;
			}
1736
			goto out;
L
Linus Torvalds 已提交
1737 1738
		}

1739
		if (dev == NULL)
1740
			goto out;
1741 1742 1743 1744 1745

		neigh = neigh_lookup(tbl, dst, dev);
		if (neigh == NULL) {
			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
				err = -ENOENT;
1746
				goto out;
1747
			}
1748

1749 1750 1751
			neigh = __neigh_lookup_errno(tbl, dst, dev);
			if (IS_ERR(neigh)) {
				err = PTR_ERR(neigh);
1752
				goto out;
L
Linus Torvalds 已提交
1753 1754
			}
		} else {
1755 1756 1757
			if (nlh->nlmsg_flags & NLM_F_EXCL) {
				err = -EEXIST;
				neigh_release(neigh);
1758
				goto out;
L
Linus Torvalds 已提交
1759 1760
			}

1761 1762 1763
			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
		}
L
Linus Torvalds 已提交
1764

1765 1766 1767 1768 1769
		if (ndm->ndm_flags & NTF_USE) {
			neigh_event_send(neigh, NULL);
			err = 0;
		} else
			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1770
		neigh_release(neigh);
1771
		goto out;
L
Linus Torvalds 已提交
1772 1773 1774
	}

	read_unlock(&neigh_tbl_lock);
1775
	err = -EAFNOSUPPORT;
L
Linus Torvalds 已提交
1776 1777 1778 1779
out:
	return err;
}

1780 1781
static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
1782 1783 1784 1785 1786
	struct nlattr *nest;

	nest = nla_nest_start(skb, NDTA_PARMS);
	if (nest == NULL)
		return -ENOBUFS;
1787

1788 1789 1790 1791 1792 1793
	if ((parms->dev &&
	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
	    /* approximative value for deprecated QUEUE_LEN (in packets) */
	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1794
			parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809
	    nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
	    nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
	    nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
	    nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
			  parms->base_reachable_time) ||
	    nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
			  parms->delay_probe_time) ||
	    nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
	    nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
	    nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
		goto nla_put_failure;
1810
	return nla_nest_end(skb, nest);
1811

1812
nla_put_failure:
1813 1814
	nla_nest_cancel(skb, nest);
	return -EMSGSIZE;
1815 1816
}

1817 1818
static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
			      u32 pid, u32 seq, int type, int flags)
1819 1820 1821 1822
{
	struct nlmsghdr *nlh;
	struct ndtmsg *ndtmsg;

1823 1824
	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
	if (nlh == NULL)
1825
		return -EMSGSIZE;
1826

1827
	ndtmsg = nlmsg_data(nlh);
1828 1829 1830

	read_lock_bh(&tbl->lock);
	ndtmsg->ndtm_family = tbl->family;
1831 1832
	ndtmsg->ndtm_pad1   = 0;
	ndtmsg->ndtm_pad2   = 0;
1833

1834 1835 1836 1837 1838 1839
	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
		goto nla_put_failure;
1840 1841 1842 1843
	{
		unsigned long now = jiffies;
		unsigned int flush_delta = now - tbl->last_flush;
		unsigned int rand_delta = now - tbl->last_rand;
1844
		struct neigh_hash_table *nht;
1845 1846 1847 1848 1849 1850 1851 1852 1853
		struct ndt_config ndc = {
			.ndtc_key_len		= tbl->key_len,
			.ndtc_entry_size	= tbl->entry_size,
			.ndtc_entries		= atomic_read(&tbl->entries),
			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
		};

1854 1855
		rcu_read_lock_bh();
		nht = rcu_dereference_bh(tbl->nht);
1856
		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1857
		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1858 1859
		rcu_read_unlock_bh();

1860 1861
		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
			goto nla_put_failure;
1862 1863 1864 1865 1866 1867 1868 1869
	}

	{
		int cpu;
		struct ndt_stats ndst;

		memset(&ndst, 0, sizeof(ndst));

1870
		for_each_possible_cpu(cpu) {
1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
			struct neigh_statistics	*st;

			st = per_cpu_ptr(tbl->stats, cpu);
			ndst.ndts_allocs		+= st->allocs;
			ndst.ndts_destroys		+= st->destroys;
			ndst.ndts_hash_grows		+= st->hash_grows;
			ndst.ndts_res_failed		+= st->res_failed;
			ndst.ndts_lookups		+= st->lookups;
			ndst.ndts_hits			+= st->hits;
			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
		}

1886 1887
		if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
			goto nla_put_failure;
1888 1889 1890 1891
	}

	BUG_ON(tbl->parms.dev);
	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1892
		goto nla_put_failure;
1893 1894

	read_unlock_bh(&tbl->lock);
1895
	return nlmsg_end(skb, nlh);
1896

1897
nla_put_failure:
1898
	read_unlock_bh(&tbl->lock);
1899 1900
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1901 1902
}

1903 1904
static int neightbl_fill_param_info(struct sk_buff *skb,
				    struct neigh_table *tbl,
1905
				    struct neigh_parms *parms,
1906 1907
				    u32 pid, u32 seq, int type,
				    unsigned int flags)
1908 1909 1910 1911
{
	struct ndtmsg *ndtmsg;
	struct nlmsghdr *nlh;

1912 1913
	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
	if (nlh == NULL)
1914
		return -EMSGSIZE;
1915

1916
	ndtmsg = nlmsg_data(nlh);
1917 1918 1919

	read_lock_bh(&tbl->lock);
	ndtmsg->ndtm_family = tbl->family;
1920 1921
	ndtmsg->ndtm_pad1   = 0;
	ndtmsg->ndtm_pad2   = 0;
1922

1923 1924 1925
	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
	    neightbl_fill_parms(skb, parms) < 0)
		goto errout;
1926 1927

	read_unlock_bh(&tbl->lock);
1928 1929
	return nlmsg_end(skb, nlh);
errout:
1930
	read_unlock_bh(&tbl->lock);
1931 1932
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1933
}
1934

1935
static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1936 1937 1938 1939 1940 1941 1942 1943
	[NDTA_NAME]		= { .type = NLA_STRING },
	[NDTA_THRESH1]		= { .type = NLA_U32 },
	[NDTA_THRESH2]		= { .type = NLA_U32 },
	[NDTA_THRESH3]		= { .type = NLA_U32 },
	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
	[NDTA_PARMS]		= { .type = NLA_NESTED },
};

1944
static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959
	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
};

1960
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1961
{
1962
	struct net *net = sock_net(skb->sk);
1963
	struct neigh_table *tbl;
1964 1965 1966
	struct ndtmsg *ndtmsg;
	struct nlattr *tb[NDTA_MAX+1];
	int err;
1967

1968 1969 1970 1971
	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
			  nl_neightbl_policy);
	if (err < 0)
		goto errout;
1972

1973 1974 1975 1976 1977 1978
	if (tb[NDTA_NAME] == NULL) {
		err = -EINVAL;
		goto errout;
	}

	ndtmsg = nlmsg_data(nlh);
1979 1980 1981 1982 1983
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
			continue;

1984
		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1985 1986 1987 1988 1989
			break;
	}

	if (tbl == NULL) {
		err = -ENOENT;
1990
		goto errout_locked;
1991 1992
	}

1993
	/*
1994 1995 1996 1997 1998
	 * We acquire tbl->lock to be nice to the periodic timers and
	 * make sure they always see a consistent set of values.
	 */
	write_lock_bh(&tbl->lock);

1999 2000
	if (tb[NDTA_PARMS]) {
		struct nlattr *tbp[NDTPA_MAX+1];
2001
		struct neigh_parms *p;
2002
		int i, ifindex = 0;
2003

2004 2005 2006 2007
		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
				       nl_ntbl_parm_policy);
		if (err < 0)
			goto errout_tbl_lock;
2008

2009 2010
		if (tbp[NDTPA_IFINDEX])
			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2011

2012
		p = lookup_neigh_parms(tbl, net, ifindex);
2013 2014
		if (p == NULL) {
			err = -ENOENT;
2015
			goto errout_tbl_lock;
2016 2017
		}

2018 2019 2020
		for (i = 1; i <= NDTPA_MAX; i++) {
			if (tbp[i] == NULL)
				continue;
2021

2022 2023
			switch (i) {
			case NDTPA_QUEUE_LEN:
E
Eric Dumazet 已提交
2024 2025 2026 2027 2028
				p->queue_len_bytes = nla_get_u32(tbp[i]) *
						     SKB_TRUESIZE(ETH_FRAME_LEN);
				break;
			case NDTPA_QUEUE_LENBYTES:
				p->queue_len_bytes = nla_get_u32(tbp[i]);
2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065
				break;
			case NDTPA_PROXY_QLEN:
				p->proxy_qlen = nla_get_u32(tbp[i]);
				break;
			case NDTPA_APP_PROBES:
				p->app_probes = nla_get_u32(tbp[i]);
				break;
			case NDTPA_UCAST_PROBES:
				p->ucast_probes = nla_get_u32(tbp[i]);
				break;
			case NDTPA_MCAST_PROBES:
				p->mcast_probes = nla_get_u32(tbp[i]);
				break;
			case NDTPA_BASE_REACHABLE_TIME:
				p->base_reachable_time = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_GC_STALETIME:
				p->gc_staletime = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_DELAY_PROBE_TIME:
				p->delay_probe_time = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_RETRANS_TIME:
				p->retrans_time = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_ANYCAST_DELAY:
				p->anycast_delay = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_PROXY_DELAY:
				p->proxy_delay = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_LOCKTIME:
				p->locktime = nla_get_msecs(tbp[i]);
				break;
			}
		}
	}
2066

2067 2068
	if (tb[NDTA_THRESH1])
		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2069

2070 2071
	if (tb[NDTA_THRESH2])
		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2072

2073 2074
	if (tb[NDTA_THRESH3])
		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2075

2076 2077
	if (tb[NDTA_GC_INTERVAL])
		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2078 2079 2080

	err = 0;

2081
errout_tbl_lock:
2082
	write_unlock_bh(&tbl->lock);
2083
errout_locked:
2084
	read_unlock(&neigh_tbl_lock);
2085
errout:
2086 2087 2088
	return err;
}

2089
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2090
{
2091
	struct net *net = sock_net(skb->sk);
2092 2093 2094
	int family, tidx, nidx = 0;
	int tbl_skip = cb->args[0];
	int neigh_skip = cb->args[1];
2095 2096
	struct neigh_table *tbl;

2097
	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2098 2099

	read_lock(&neigh_tbl_lock);
2100
	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2101 2102
		struct neigh_parms *p;

2103
		if (tidx < tbl_skip || (family && tbl->family != family))
2104 2105
			continue;

2106
		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2107 2108
				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
				       NLM_F_MULTI) <= 0)
2109 2110
			break;

2111
		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2112
			if (!net_eq(neigh_parms_net(p), net))
2113 2114
				continue;

2115 2116
			if (nidx < neigh_skip)
				goto next;
2117

2118
			if (neightbl_fill_param_info(skb, tbl, p,
2119
						     NETLINK_CB(cb->skb).portid,
2120 2121 2122
						     cb->nlh->nlmsg_seq,
						     RTM_NEWNEIGHTBL,
						     NLM_F_MULTI) <= 0)
2123
				goto out;
2124 2125
		next:
			nidx++;
2126 2127
		}

2128
		neigh_skip = 0;
2129 2130 2131
	}
out:
	read_unlock(&neigh_tbl_lock);
2132 2133
	cb->args[0] = tidx;
	cb->args[1] = nidx;
2134 2135 2136

	return skb->len;
}
L
Linus Torvalds 已提交
2137

2138 2139
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
			   u32 pid, u32 seq, int type, unsigned int flags)
L
Linus Torvalds 已提交
2140 2141 2142
{
	unsigned long now = jiffies;
	struct nda_cacheinfo ci;
2143 2144 2145 2146 2147
	struct nlmsghdr *nlh;
	struct ndmsg *ndm;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
	if (nlh == NULL)
2148
		return -EMSGSIZE;
L
Linus Torvalds 已提交
2149

2150 2151
	ndm = nlmsg_data(nlh);
	ndm->ndm_family	 = neigh->ops->family;
2152 2153
	ndm->ndm_pad1    = 0;
	ndm->ndm_pad2    = 0;
2154 2155 2156
	ndm->ndm_flags	 = neigh->flags;
	ndm->ndm_type	 = neigh->type;
	ndm->ndm_ifindex = neigh->dev->ifindex;
L
Linus Torvalds 已提交
2157

2158 2159
	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
		goto nla_put_failure;
2160 2161 2162

	read_lock_bh(&neigh->lock);
	ndm->ndm_state	 = neigh->nud_state;
2163 2164 2165 2166 2167 2168 2169 2170
	if (neigh->nud_state & NUD_VALID) {
		char haddr[MAX_ADDR_LEN];

		neigh_ha_snapshot(haddr, neigh, neigh->dev);
		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
			read_unlock_bh(&neigh->lock);
			goto nla_put_failure;
		}
2171 2172
	}

2173 2174 2175
	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2176 2177 2178
	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
	read_unlock_bh(&neigh->lock);

2179 2180 2181
	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
		goto nla_put_failure;
2182 2183 2184 2185

	return nlmsg_end(skb, nlh);

nla_put_failure:
2186 2187
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
L
Linus Torvalds 已提交
2188 2189
}

2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209
static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
			    u32 pid, u32 seq, int type, unsigned int flags,
			    struct neigh_table *tbl)
{
	struct nlmsghdr *nlh;
	struct ndmsg *ndm;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
	if (nlh == NULL)
		return -EMSGSIZE;

	ndm = nlmsg_data(nlh);
	ndm->ndm_family	 = tbl->family;
	ndm->ndm_pad1    = 0;
	ndm->ndm_pad2    = 0;
	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
	ndm->ndm_type	 = NDA_DST;
	ndm->ndm_ifindex = pn->dev->ifindex;
	ndm->ndm_state	 = NUD_NONE;

2210 2211
	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
		goto nla_put_failure;
2212 2213 2214 2215 2216 2217 2218 2219

	return nlmsg_end(skb, nlh);

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

T
Thomas Graf 已提交
2220 2221 2222 2223 2224
static void neigh_update_notify(struct neighbour *neigh)
{
	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
}
L
Linus Torvalds 已提交
2225 2226 2227 2228

static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
			    struct netlink_callback *cb)
{
2229
	struct net *net = sock_net(skb->sk);
L
Linus Torvalds 已提交
2230 2231 2232
	struct neighbour *n;
	int rc, h, s_h = cb->args[1];
	int idx, s_idx = idx = cb->args[2];
2233
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2234

2235 2236 2237
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);

2238
	for (h = s_h; h < (1 << nht->hash_shift); h++) {
L
Linus Torvalds 已提交
2239 2240
		if (h > s_h)
			s_idx = 0;
2241 2242 2243
		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
		     n != NULL;
		     n = rcu_dereference_bh(n->next)) {
O
Octavian Purdila 已提交
2244
			if (!net_eq(dev_net(n->dev), net))
2245
				continue;
2246 2247
			if (idx < s_idx)
				goto next;
2248
			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
L
Linus Torvalds 已提交
2249
					    cb->nlh->nlmsg_seq,
2250 2251
					    RTM_NEWNEIGH,
					    NLM_F_MULTI) <= 0) {
L
Linus Torvalds 已提交
2252 2253 2254
				rc = -1;
				goto out;
			}
2255
next:
2256
			idx++;
L
Linus Torvalds 已提交
2257 2258 2259 2260
		}
	}
	rc = skb->len;
out:
2261
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2262 2263 2264 2265 2266
	cb->args[1] = h;
	cb->args[2] = idx;
	return rc;
}

2267 2268 2269 2270 2271 2272 2273 2274 2275 2276
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
			     struct netlink_callback *cb)
{
	struct pneigh_entry *n;
	struct net *net = sock_net(skb->sk);
	int rc, h, s_h = cb->args[3];
	int idx, s_idx = idx = cb->args[4];

	read_lock_bh(&tbl->lock);

2277
	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2278 2279 2280 2281 2282 2283 2284
		if (h > s_h)
			s_idx = 0;
		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
			if (dev_net(n->dev) != net)
				continue;
			if (idx < s_idx)
				goto next;
2285
			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306
					    cb->nlh->nlmsg_seq,
					    RTM_NEWNEIGH,
					    NLM_F_MULTI, tbl) <= 0) {
				read_unlock_bh(&tbl->lock);
				rc = -1;
				goto out;
			}
		next:
			idx++;
		}
	}

	read_unlock_bh(&tbl->lock);
	rc = skb->len;
out:
	cb->args[3] = h;
	cb->args[4] = idx;
	return rc;

}

2307
static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
L
Linus Torvalds 已提交
2308 2309 2310
{
	struct neigh_table *tbl;
	int t, family, s_t;
2311
	int proxy = 0;
2312
	int err;
L
Linus Torvalds 已提交
2313 2314

	read_lock(&neigh_tbl_lock);
2315
	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2316 2317 2318 2319 2320 2321 2322 2323

	/* check for full ndmsg structure presence, family member is
	 * the same for both structures
	 */
	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
		proxy = 1;

L
Linus Torvalds 已提交
2324 2325
	s_t = cb->args[0];

2326
	for (tbl = neigh_tables, t = 0; tbl;
2327
	     tbl = tbl->next, t++) {
L
Linus Torvalds 已提交
2328 2329 2330 2331 2332
		if (t < s_t || (family && tbl->family != family))
			continue;
		if (t > s_t)
			memset(&cb->args[1], 0, sizeof(cb->args) -
						sizeof(cb->args[0]));
2333 2334 2335 2336
		if (proxy)
			err = pneigh_dump_table(tbl, skb, cb);
		else
			err = neigh_dump_table(tbl, skb, cb);
2337 2338
		if (err < 0)
			break;
L
Linus Torvalds 已提交
2339 2340 2341 2342 2343 2344 2345 2346 2347 2348
	}
	read_unlock(&neigh_tbl_lock);

	cb->args[0] = t;
	return skb->len;
}

void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
	int chain;
2349
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2350

2351 2352 2353
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);

2354
	read_lock(&tbl->lock); /* avoid resizes */
2355
	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
L
Linus Torvalds 已提交
2356 2357
		struct neighbour *n;

2358 2359 2360
		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
		     n != NULL;
		     n = rcu_dereference_bh(n->next))
L
Linus Torvalds 已提交
2361 2362
			cb(n, cookie);
	}
2363 2364
	read_unlock(&tbl->lock);
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2365 2366 2367 2368 2369 2370 2371 2372
}
EXPORT_SYMBOL(neigh_for_each);

/* The tbl->lock must be held as a writer and BH disabled. */
void __neigh_for_each_release(struct neigh_table *tbl,
			      int (*cb)(struct neighbour *))
{
	int chain;
2373
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2374

2375 2376
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
2377
	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2378 2379
		struct neighbour *n;
		struct neighbour __rcu **np;
L
Linus Torvalds 已提交
2380

2381
		np = &nht->hash_buckets[chain];
2382 2383
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
2384 2385 2386 2387 2388
			int release;

			write_lock(&n->lock);
			release = cb(n);
			if (release) {
2389 2390 2391
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
2392 2393 2394 2395
				n->dead = 1;
			} else
				np = &n->next;
			write_unlock(&n->lock);
2396 2397
			if (release)
				neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
2398 2399 2400 2401 2402 2403 2404 2405 2406 2407
		}
	}
}
EXPORT_SYMBOL(__neigh_for_each_release);

#ifdef CONFIG_PROC_FS

static struct neighbour *neigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
2408
	struct net *net = seq_file_net(seq);
2409
	struct neigh_hash_table *nht = state->nht;
L
Linus Torvalds 已提交
2410 2411 2412 2413
	struct neighbour *n = NULL;
	int bucket = state->bucket;

	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2414
	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2415
		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
L
Linus Torvalds 已提交
2416 2417

		while (n) {
2418
			if (!net_eq(dev_net(n->dev), net))
2419
				goto next;
L
Linus Torvalds 已提交
2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431
			if (state->neigh_sub_iter) {
				loff_t fakep = 0;
				void *v;

				v = state->neigh_sub_iter(state, n, &fakep);
				if (!v)
					goto next;
			}
			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
				break;
			if (n->nud_state & ~NUD_NOARP)
				break;
2432 2433
next:
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448
		}

		if (n)
			break;
	}
	state->bucket = bucket;

	return n;
}

static struct neighbour *neigh_get_next(struct seq_file *seq,
					struct neighbour *n,
					loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
2449
	struct net *net = seq_file_net(seq);
2450
	struct neigh_hash_table *nht = state->nht;
L
Linus Torvalds 已提交
2451 2452 2453 2454 2455 2456

	if (state->neigh_sub_iter) {
		void *v = state->neigh_sub_iter(state, n, pos);
		if (v)
			return n;
	}
2457
	n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2458 2459 2460

	while (1) {
		while (n) {
2461
			if (!net_eq(dev_net(n->dev), net))
2462
				goto next;
L
Linus Torvalds 已提交
2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473
			if (state->neigh_sub_iter) {
				void *v = state->neigh_sub_iter(state, n, pos);
				if (v)
					return n;
				goto next;
			}
			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
				break;

			if (n->nud_state & ~NUD_NOARP)
				break;
2474 2475
next:
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2476 2477 2478 2479 2480
		}

		if (n)
			break;

2481
		if (++state->bucket >= (1 << nht->hash_shift))
L
Linus Torvalds 已提交
2482 2483
			break;

2484
		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
L
Linus Torvalds 已提交
2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496
	}

	if (n && pos)
		--(*pos);
	return n;
}

static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
{
	struct neighbour *n = neigh_get_first(seq);

	if (n) {
2497
		--(*pos);
L
Linus Torvalds 已提交
2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509
		while (*pos) {
			n = neigh_get_next(seq, n, pos);
			if (!n)
				break;
		}
	}
	return *pos ? NULL : n;
}

static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
2510
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
2511 2512 2513 2514 2515 2516 2517
	struct neigh_table *tbl = state->tbl;
	struct pneigh_entry *pn = NULL;
	int bucket = state->bucket;

	state->flags |= NEIGH_SEQ_IS_PNEIGH;
	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
		pn = tbl->phash_buckets[bucket];
2518
		while (pn && !net_eq(pneigh_net(pn), net))
2519
			pn = pn->next;
L
Linus Torvalds 已提交
2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532
		if (pn)
			break;
	}
	state->bucket = bucket;

	return pn;
}

static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
					    struct pneigh_entry *pn,
					    loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
2533
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
2534 2535
	struct neigh_table *tbl = state->tbl;

2536 2537 2538 2539
	do {
		pn = pn->next;
	} while (pn && !net_eq(pneigh_net(pn), net));

L
Linus Torvalds 已提交
2540 2541 2542 2543
	while (!pn) {
		if (++state->bucket > PNEIGH_HASHMASK)
			break;
		pn = tbl->phash_buckets[state->bucket];
2544
		while (pn && !net_eq(pneigh_net(pn), net))
2545
			pn = pn->next;
L
Linus Torvalds 已提交
2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560
		if (pn)
			break;
	}

	if (pn && pos)
		--(*pos);

	return pn;
}

static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
{
	struct pneigh_entry *pn = pneigh_get_first(seq);

	if (pn) {
2561
		--(*pos);
L
Linus Torvalds 已提交
2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574
		while (*pos) {
			pn = pneigh_get_next(seq, pn, pos);
			if (!pn)
				break;
		}
	}
	return *pos ? NULL : pn;
}

static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
	void *rc;
2575
	loff_t idxpos = *pos;
L
Linus Torvalds 已提交
2576

2577
	rc = neigh_get_idx(seq, &idxpos);
L
Linus Torvalds 已提交
2578
	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2579
		rc = pneigh_get_idx(seq, &idxpos);
L
Linus Torvalds 已提交
2580 2581 2582 2583 2584

	return rc;
}

void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2585
	__acquires(rcu_bh)
L
Linus Torvalds 已提交
2586 2587 2588 2589 2590 2591 2592
{
	struct neigh_seq_state *state = seq->private;

	state->tbl = tbl;
	state->bucket = 0;
	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);

2593 2594
	rcu_read_lock_bh();
	state->nht = rcu_dereference_bh(tbl->nht);
2595

2596
	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
L
Linus Torvalds 已提交
2597 2598 2599 2600 2601 2602 2603 2604 2605
}
EXPORT_SYMBOL(neigh_seq_start);

void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct neigh_seq_state *state;
	void *rc;

	if (v == SEQ_START_TOKEN) {
2606
		rc = neigh_get_first(seq);
L
Linus Torvalds 已提交
2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627
		goto out;
	}

	state = seq->private;
	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
		rc = neigh_get_next(seq, v, NULL);
		if (rc)
			goto out;
		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
			rc = pneigh_get_first(seq);
	} else {
		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
		rc = pneigh_get_next(seq, v, NULL);
	}
out:
	++(*pos);
	return rc;
}
EXPORT_SYMBOL(neigh_seq_next);

void neigh_seq_stop(struct seq_file *seq, void *v)
2628
	__releases(rcu_bh)
L
Linus Torvalds 已提交
2629
{
2630
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2631 2632 2633 2634 2635 2636 2637
}
EXPORT_SYMBOL(neigh_seq_stop);

/* statistics via seq_file */

static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
A
Alexey Dobriyan 已提交
2638
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2639 2640 2641 2642
	int cpu;

	if (*pos == 0)
		return SEQ_START_TOKEN;
2643

2644
	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
L
Linus Torvalds 已提交
2645 2646 2647 2648 2649 2650 2651 2652 2653 2654
		if (!cpu_possible(cpu))
			continue;
		*pos = cpu+1;
		return per_cpu_ptr(tbl->stats, cpu);
	}
	return NULL;
}

static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
A
Alexey Dobriyan 已提交
2655
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2656 2657
	int cpu;

2658
	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
L
Linus Torvalds 已提交
2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673
		if (!cpu_possible(cpu))
			continue;
		*pos = cpu+1;
		return per_cpu_ptr(tbl->stats, cpu);
	}
	return NULL;
}

static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
{

}

static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
A
Alexey Dobriyan 已提交
2674
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2675 2676 2677
	struct neigh_statistics *st = v;

	if (v == SEQ_START_TOKEN) {
2678
		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
L
Linus Torvalds 已提交
2679 2680 2681 2682
		return 0;
	}

	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2683
			"%08lx %08lx  %08lx %08lx %08lx\n",
L
Linus Torvalds 已提交
2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698
		   atomic_read(&tbl->entries),

		   st->allocs,
		   st->destroys,
		   st->hash_grows,

		   st->lookups,
		   st->hits,

		   st->res_failed,

		   st->rcv_probes_mcast,
		   st->rcv_probes_ucast,

		   st->periodic_gc_runs,
2699 2700
		   st->forced_gc_runs,
		   st->unres_discards
L
Linus Torvalds 已提交
2701 2702 2703 2704 2705
		   );

	return 0;
}

2706
static const struct seq_operations neigh_stat_seq_ops = {
L
Linus Torvalds 已提交
2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718
	.start	= neigh_stat_seq_start,
	.next	= neigh_stat_seq_next,
	.stop	= neigh_stat_seq_stop,
	.show	= neigh_stat_seq_show,
};

static int neigh_stat_seq_open(struct inode *inode, struct file *file)
{
	int ret = seq_open(file, &neigh_stat_seq_ops);

	if (!ret) {
		struct seq_file *sf = file->private_data;
A
Alexey Dobriyan 已提交
2719
		sf->private = PDE(inode)->data;
L
Linus Torvalds 已提交
2720 2721 2722 2723
	}
	return ret;
};

2724
static const struct file_operations neigh_stat_seq_fops = {
L
Linus Torvalds 已提交
2725 2726 2727 2728 2729 2730 2731 2732 2733
	.owner	 = THIS_MODULE,
	.open 	 = neigh_stat_seq_open,
	.read	 = seq_read,
	.llseek	 = seq_lseek,
	.release = seq_release,
};

#endif /* CONFIG_PROC_FS */

2734 2735 2736 2737 2738 2739 2740 2741 2742
static inline size_t neigh_nlmsg_size(void)
{
	return NLMSG_ALIGN(sizeof(struct ndmsg))
	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
	       + nla_total_size(sizeof(struct nda_cacheinfo))
	       + nla_total_size(4); /* NDA_PROBES */
}

2743
static void __neigh_notify(struct neighbour *n, int type, int flags)
L
Linus Torvalds 已提交
2744
{
2745
	struct net *net = dev_net(n->dev);
2746
	struct sk_buff *skb;
2747
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
2748

2749
	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2750
	if (skb == NULL)
2751
		goto errout;
L
Linus Torvalds 已提交
2752

2753
	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2754 2755 2756 2757 2758 2759
	if (err < 0) {
		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
2760 2761
	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
	return;
2762 2763
errout:
	if (err < 0)
2764
		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
L
Linus Torvalds 已提交
2765 2766
}

T
Thomas Graf 已提交
2767
#ifdef CONFIG_ARPD
2768
void neigh_app_ns(struct neighbour *n)
L
Linus Torvalds 已提交
2769
{
2770 2771
	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
}
2772
EXPORT_SYMBOL(neigh_app_ns);
L
Linus Torvalds 已提交
2773 2774 2775
#endif /* CONFIG_ARPD */

#ifdef CONFIG_SYSCTL
2776 2777
static int zero;
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
L
Linus Torvalds 已提交
2778

E
Eric Dumazet 已提交
2779 2780 2781 2782 2783 2784
static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
			   size_t *lenp, loff_t *ppos)
{
	int size, ret;
	ctl_table tmp = *ctl;

2785 2786
	tmp.extra1 = &zero;
	tmp.extra2 = &unres_qlen_max;
E
Eric Dumazet 已提交
2787
	tmp.data = &size;
2788 2789 2790 2791

	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

E
Eric Dumazet 已提交
2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818
	if (write && !ret)
		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
	return ret;
}

enum {
	NEIGH_VAR_MCAST_PROBE,
	NEIGH_VAR_UCAST_PROBE,
	NEIGH_VAR_APP_PROBE,
	NEIGH_VAR_RETRANS_TIME,
	NEIGH_VAR_BASE_REACHABLE_TIME,
	NEIGH_VAR_DELAY_PROBE_TIME,
	NEIGH_VAR_GC_STALETIME,
	NEIGH_VAR_QUEUE_LEN,
	NEIGH_VAR_QUEUE_LEN_BYTES,
	NEIGH_VAR_PROXY_QLEN,
	NEIGH_VAR_ANYCAST_DELAY,
	NEIGH_VAR_PROXY_DELAY,
	NEIGH_VAR_LOCKTIME,
	NEIGH_VAR_RETRANS_TIME_MS,
	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
	NEIGH_VAR_GC_INTERVAL,
	NEIGH_VAR_GC_THRESH1,
	NEIGH_VAR_GC_THRESH2,
	NEIGH_VAR_GC_THRESH3,
	NEIGH_VAR_MAX
};
2819

L
Linus Torvalds 已提交
2820 2821
static struct neigh_sysctl_table {
	struct ctl_table_header *sysctl_header;
E
Eric Dumazet 已提交
2822
	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2823
} neigh_sysctl_template __read_mostly = {
L
Linus Torvalds 已提交
2824
	.neigh_vars = {
E
Eric Dumazet 已提交
2825
		[NEIGH_VAR_MCAST_PROBE] = {
L
Linus Torvalds 已提交
2826 2827 2828
			.procname	= "mcast_solicit",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2829
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2830
		},
E
Eric Dumazet 已提交
2831
		[NEIGH_VAR_UCAST_PROBE] = {
L
Linus Torvalds 已提交
2832 2833 2834
			.procname	= "ucast_solicit",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2835
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2836
		},
E
Eric Dumazet 已提交
2837
		[NEIGH_VAR_APP_PROBE] = {
L
Linus Torvalds 已提交
2838 2839 2840
			.procname	= "app_solicit",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2841
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2842
		},
E
Eric Dumazet 已提交
2843
		[NEIGH_VAR_RETRANS_TIME] = {
L
Linus Torvalds 已提交
2844 2845 2846
			.procname	= "retrans_time",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2847
			.proc_handler	= proc_dointvec_userhz_jiffies,
L
Linus Torvalds 已提交
2848
		},
E
Eric Dumazet 已提交
2849
		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
L
Linus Torvalds 已提交
2850 2851 2852
			.procname	= "base_reachable_time",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2853
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2854
		},
E
Eric Dumazet 已提交
2855
		[NEIGH_VAR_DELAY_PROBE_TIME] = {
L
Linus Torvalds 已提交
2856 2857 2858
			.procname	= "delay_first_probe_time",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2859
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2860
		},
E
Eric Dumazet 已提交
2861
		[NEIGH_VAR_GC_STALETIME] = {
L
Linus Torvalds 已提交
2862 2863 2864
			.procname	= "gc_stale_time",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2865
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2866
		},
E
Eric Dumazet 已提交
2867
		[NEIGH_VAR_QUEUE_LEN] = {
L
Linus Torvalds 已提交
2868 2869 2870
			.procname	= "unres_qlen",
			.maxlen		= sizeof(int),
			.mode		= 0644,
E
Eric Dumazet 已提交
2871 2872 2873 2874 2875 2876
			.proc_handler	= proc_unres_qlen,
		},
		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
			.procname	= "unres_qlen_bytes",
			.maxlen		= sizeof(int),
			.mode		= 0644,
2877 2878
			.extra1		= &zero,
			.proc_handler   = proc_dointvec_minmax,
L
Linus Torvalds 已提交
2879
		},
E
Eric Dumazet 已提交
2880
		[NEIGH_VAR_PROXY_QLEN] = {
L
Linus Torvalds 已提交
2881 2882 2883
			.procname	= "proxy_qlen",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2884
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2885
		},
E
Eric Dumazet 已提交
2886
		[NEIGH_VAR_ANYCAST_DELAY] = {
L
Linus Torvalds 已提交
2887 2888 2889
			.procname	= "anycast_delay",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2890
			.proc_handler	= proc_dointvec_userhz_jiffies,
L
Linus Torvalds 已提交
2891
		},
E
Eric Dumazet 已提交
2892
		[NEIGH_VAR_PROXY_DELAY] = {
L
Linus Torvalds 已提交
2893 2894 2895
			.procname	= "proxy_delay",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2896
			.proc_handler	= proc_dointvec_userhz_jiffies,
L
Linus Torvalds 已提交
2897
		},
E
Eric Dumazet 已提交
2898
		[NEIGH_VAR_LOCKTIME] = {
L
Linus Torvalds 已提交
2899 2900 2901
			.procname	= "locktime",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2902
			.proc_handler	= proc_dointvec_userhz_jiffies,
L
Linus Torvalds 已提交
2903
		},
E
Eric Dumazet 已提交
2904
		[NEIGH_VAR_RETRANS_TIME_MS] = {
2905 2906 2907
			.procname	= "retrans_time_ms",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2908
			.proc_handler	= proc_dointvec_ms_jiffies,
2909
		},
E
Eric Dumazet 已提交
2910
		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2911 2912 2913
			.procname	= "base_reachable_time_ms",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2914
			.proc_handler	= proc_dointvec_ms_jiffies,
2915
		},
E
Eric Dumazet 已提交
2916
		[NEIGH_VAR_GC_INTERVAL] = {
L
Linus Torvalds 已提交
2917 2918 2919
			.procname	= "gc_interval",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2920
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2921
		},
E
Eric Dumazet 已提交
2922
		[NEIGH_VAR_GC_THRESH1] = {
L
Linus Torvalds 已提交
2923 2924 2925
			.procname	= "gc_thresh1",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2926
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2927
		},
E
Eric Dumazet 已提交
2928
		[NEIGH_VAR_GC_THRESH2] = {
L
Linus Torvalds 已提交
2929 2930 2931
			.procname	= "gc_thresh2",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2932
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2933
		},
E
Eric Dumazet 已提交
2934
		[NEIGH_VAR_GC_THRESH3] = {
L
Linus Torvalds 已提交
2935 2936 2937
			.procname	= "gc_thresh3",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2938
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2939
		},
2940
		{},
L
Linus Torvalds 已提交
2941 2942 2943 2944
	},
};

int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2945
			  char *p_name, proc_handler *handler)
L
Linus Torvalds 已提交
2946
{
2947
	struct neigh_sysctl_table *t;
L
Linus Torvalds 已提交
2948
	const char *dev_name_source = NULL;
2949
	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
L
Linus Torvalds 已提交
2950

2951
	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
L
Linus Torvalds 已提交
2952
	if (!t)
2953 2954
		goto err;

E
Eric Dumazet 已提交
2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969
	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
L
Linus Torvalds 已提交
2970 2971 2972

	if (dev) {
		dev_name_source = dev->name;
2973
		/* Terminate the table early */
E
Eric Dumazet 已提交
2974 2975
		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
L
Linus Torvalds 已提交
2976
	} else {
2977
		dev_name_source = "default";
E
Eric Dumazet 已提交
2978 2979 2980 2981
		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
L
Linus Torvalds 已提交
2982 2983 2984
	}


2985
	if (handler) {
L
Linus Torvalds 已提交
2986
		/* RetransTime */
E
Eric Dumazet 已提交
2987 2988
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
L
Linus Torvalds 已提交
2989
		/* ReachableTime */
E
Eric Dumazet 已提交
2990 2991
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
L
Linus Torvalds 已提交
2992
		/* RetransTime (in milliseconds)*/
E
Eric Dumazet 已提交
2993 2994
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
L
Linus Torvalds 已提交
2995
		/* ReachableTime (in milliseconds) */
E
Eric Dumazet 已提交
2996 2997
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
L
Linus Torvalds 已提交
2998 2999
	}

3000 3001 3002 3003
	/* Don't export sysctls to unprivileged users */
	if (neigh_parms_net(p)->user_ns != &init_user_ns)
		t->neigh_vars[0].procname = NULL;

3004 3005
	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
		p_name, dev_name_source);
3006
	t->sysctl_header =
3007
		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3008
	if (!t->sysctl_header)
3009
		goto free;
3010

L
Linus Torvalds 已提交
3011 3012 3013
	p->sysctl_table = t;
	return 0;

3014
free:
L
Linus Torvalds 已提交
3015
	kfree(t);
3016 3017
err:
	return -ENOBUFS;
L
Linus Torvalds 已提交
3018
}
3019
EXPORT_SYMBOL(neigh_sysctl_register);
L
Linus Torvalds 已提交
3020 3021 3022 3023 3024 3025

void neigh_sysctl_unregister(struct neigh_parms *p)
{
	if (p->sysctl_table) {
		struct neigh_sysctl_table *t = p->sysctl_table;
		p->sysctl_table = NULL;
3026
		unregister_net_sysctl_table(t->sysctl_header);
L
Linus Torvalds 已提交
3027 3028 3029
		kfree(t);
	}
}
3030
EXPORT_SYMBOL(neigh_sysctl_unregister);
L
Linus Torvalds 已提交
3031 3032 3033

#endif	/* CONFIG_SYSCTL */

3034 3035
static int __init neigh_init(void)
{
3036 3037 3038
	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3039

3040 3041 3042
	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
		      NULL);
	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3043 3044 3045 3046 3047 3048

	return 0;
}

subsys_initcall(neigh_init);