neighbour.c 70.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 *	Generic address resolution entity
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Fixes:
 *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
 *	Harald Welte		Add neighbour cache statistics like rtstat
 */

18
#include <linux/slab.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26 27 28
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/times.h>
29
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
30 31 32
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
33
#include <net/netevent.h>
34
#include <net/netlink.h>
L
Linus Torvalds 已提交
35 36
#include <linux/rtnetlink.h>
#include <linux/random.h>
37
#include <linux/string.h>
38
#include <linux/log2.h>
L
Linus Torvalds 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58

#define NEIGH_DEBUG 1

#define NEIGH_PRINTK(x...) printk(x)
#define NEIGH_NOPRINTK(x...) do { ; } while(0)
#define NEIGH_PRINTK1 NEIGH_NOPRINTK
#define NEIGH_PRINTK2 NEIGH_NOPRINTK

#if NEIGH_DEBUG >= 1
#undef NEIGH_PRINTK1
#define NEIGH_PRINTK1 NEIGH_PRINTK
#endif
#if NEIGH_DEBUG >= 2
#undef NEIGH_PRINTK2
#define NEIGH_PRINTK2 NEIGH_PRINTK
#endif

#define PNEIGH_HASHMASK		0xF

static void neigh_timer_handler(unsigned long arg);
T
Thomas Graf 已提交
59 60
static void __neigh_notify(struct neighbour *n, int type, int flags);
static void neigh_update_notify(struct neighbour *neigh);
L
Linus Torvalds 已提交
61 62 63
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);

static struct neigh_table *neigh_tables;
64
#ifdef CONFIG_PROC_FS
65
static const struct file_operations neigh_stat_seq_fops;
66
#endif
L
Linus Torvalds 已提交
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

/*
   Neighbour hash table buckets are protected with rwlock tbl->lock.

   - All the scans/updates to hash buckets MUST be made under this lock.
   - NOTHING clever should be made under this lock: no callbacks
     to protocol backends, no attempts to send something to network.
     It will result in deadlocks, if backend/driver wants to use neighbour
     cache.
   - If the entry requires some non-trivial actions, increase
     its reference count and release table lock.

   Neighbour entries are protected:
   - with reference count.
   - with rwlock neigh->lock

   Reference count prevents destruction.

   neigh->lock mainly serializes ll address data and its validity state.
   However, the same lock is used to protect another entry fields:
    - timer
    - resolution queue

   Again, nothing clever shall be made under neigh->lock,
   the most complicated procedure, which we allow is dev->hard_header.
   It is supposed, that dev->hard_header is simplistic and does
   not make callbacks to neighbour tables.

   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
   list of neighbour tables. This list is used only in process context,
 */

static DEFINE_RWLOCK(neigh_tbl_lock);

101
static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
102 103 104 105 106
{
	kfree_skb(skb);
	return -ENETDOWN;
}

107 108 109 110 111
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
	if (neigh->parms->neigh_cleanup)
		neigh->parms->neigh_cleanup(neigh);

T
Thomas Graf 已提交
112
	__neigh_notify(neigh, RTM_DELNEIGH, 0);
113 114 115
	neigh_release(neigh);
}

L
Linus Torvalds 已提交
116 117 118 119 120 121 122 123
/*
 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 * It corresponds to default IPv6 settings and is not overridable,
 * because it is really reasonable choice.
 */

unsigned long neigh_rand_reach_time(unsigned long base)
{
E
Eric Dumazet 已提交
124
	return base ? (net_random() % base) + (base >> 1) : 0;
L
Linus Torvalds 已提交
125
}
126
EXPORT_SYMBOL(neigh_rand_reach_time);
L
Linus Torvalds 已提交
127 128 129 130 131 132


static int neigh_forced_gc(struct neigh_table *tbl)
{
	int shrunk = 0;
	int i;
133
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
134 135 136 137

	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);

	write_lock_bh(&tbl->lock);
138 139
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
140
	for (i = 0; i < (1 << nht->hash_shift); i++) {
141 142
		struct neighbour *n;
		struct neighbour __rcu **np;
L
Linus Torvalds 已提交
143

144
		np = &nht->hash_buckets[i];
145 146
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
147 148 149 150 151 152 153
			/* Neighbour record may be discarded if:
			 * - nobody refers to it.
			 * - it is not permanent
			 */
			write_lock(&n->lock);
			if (atomic_read(&n->refcnt) == 1 &&
			    !(n->nud_state & NUD_PERMANENT)) {
154 155 156
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						  lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
157 158 159
				n->dead = 1;
				shrunk	= 1;
				write_unlock(&n->lock);
160
				neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
161 162 163 164 165 166 167 168 169 170 171 172 173 174
				continue;
			}
			write_unlock(&n->lock);
			np = &n->next;
		}
	}

	tbl->last_flush = jiffies;

	write_unlock_bh(&tbl->lock);

	return shrunk;
}

175 176 177 178 179 180 181 182 183 184
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
	neigh_hold(n);
	if (unlikely(mod_timer(&n->timer, when))) {
		printk("NEIGH: BUG, double timer add, state is %x\n",
		       n->nud_state);
		dump_stack();
	}
}

L
Linus Torvalds 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
static int neigh_del_timer(struct neighbour *n)
{
	if ((n->nud_state & NUD_IN_TIMER) &&
	    del_timer(&n->timer)) {
		neigh_release(n);
		return 1;
	}
	return 0;
}

static void pneigh_queue_purge(struct sk_buff_head *list)
{
	struct sk_buff *skb;

	while ((skb = skb_dequeue(list)) != NULL) {
		dev_put(skb->dev);
		kfree_skb(skb);
	}
}

205
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
L
Linus Torvalds 已提交
206 207
{
	int i;
208
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
209

210 211 212
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));

213
	for (i = 0; i < (1 << nht->hash_shift); i++) {
214 215
		struct neighbour *n;
		struct neighbour __rcu **np = &nht->hash_buckets[i];
L
Linus Torvalds 已提交
216

217 218
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
219 220 221 222
			if (dev && n->dev != dev) {
				np = &n->next;
				continue;
			}
223 224 225
			rcu_assign_pointer(*np,
				   rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
			write_lock(&n->lock);
			neigh_del_timer(n);
			n->dead = 1;

			if (atomic_read(&n->refcnt) != 1) {
				/* The most unpleasant situation.
				   We must destroy neighbour entry,
				   but someone still uses it.

				   The destroy will be delayed until
				   the last user releases us, but
				   we must kill timers etc. and move
				   it to safe state.
				 */
				skb_queue_purge(&n->arp_queue);
E
Eric Dumazet 已提交
241
				n->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
242 243 244 245 246 247 248 249
				n->output = neigh_blackhole;
				if (n->nud_state & NUD_VALID)
					n->nud_state = NUD_NOARP;
				else
					n->nud_state = NUD_NONE;
				NEIGH_PRINTK2("neigh %p is stray.\n", n);
			}
			write_unlock(&n->lock);
250
			neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
251 252
		}
	}
253
}
L
Linus Torvalds 已提交
254

255 256 257 258 259 260
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
	write_unlock_bh(&tbl->lock);
}
261
EXPORT_SYMBOL(neigh_changeaddr);
262 263 264 265 266

int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
L
Linus Torvalds 已提交
267 268 269 270 271 272 273
	pneigh_ifdown(tbl, dev);
	write_unlock_bh(&tbl->lock);

	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	return 0;
}
274
EXPORT_SYMBOL(neigh_ifdown);
L
Linus Torvalds 已提交
275

276
static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
L
Linus Torvalds 已提交
277 278 279 280 281 282 283 284 285 286 287 288 289 290
{
	struct neighbour *n = NULL;
	unsigned long now = jiffies;
	int entries;

	entries = atomic_inc_return(&tbl->entries) - 1;
	if (entries >= tbl->gc_thresh3 ||
	    (entries >= tbl->gc_thresh2 &&
	     time_after(now, tbl->last_flush + 5 * HZ))) {
		if (!neigh_forced_gc(tbl) &&
		    entries >= tbl->gc_thresh3)
			goto out_entries;
	}

291 292 293 294 295 296 297 298 299
	if (tbl->entry_size)
		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
	else {
		int sz = sizeof(*n) + tbl->key_len;

		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
		sz += dev->neigh_priv_len;
		n = kzalloc(sz, GFP_ATOMIC);
	}
L
Linus Torvalds 已提交
300 301 302 303 304
	if (!n)
		goto out_entries;

	skb_queue_head_init(&n->arp_queue);
	rwlock_init(&n->lock);
305
	seqlock_init(&n->ha_lock);
L
Linus Torvalds 已提交
306 307 308
	n->updated	  = n->used = now;
	n->nud_state	  = NUD_NONE;
	n->output	  = neigh_blackhole;
309
	seqlock_init(&n->hh.hh_lock);
L
Linus Torvalds 已提交
310
	n->parms	  = neigh_parms_clone(&tbl->parms);
311
	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
L
Linus Torvalds 已提交
312 313 314 315 316 317 318 319 320 321 322 323 324

	NEIGH_CACHE_STAT_INC(tbl, allocs);
	n->tbl		  = tbl;
	atomic_set(&n->refcnt, 1);
	n->dead		  = 1;
out:
	return n;

out_entries:
	atomic_dec(&tbl->entries);
	goto out;
}

325
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
L
Linus Torvalds 已提交
326
{
327
	size_t size = (1 << shift) * sizeof(struct neighbour *);
328
	struct neigh_hash_table *ret;
E
Eric Dumazet 已提交
329
	struct neighbour __rcu **buckets;
L
Linus Torvalds 已提交
330

331 332 333 334 335 336
	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
	if (!ret)
		return NULL;
	if (size <= PAGE_SIZE)
		buckets = kzalloc(size, GFP_ATOMIC);
	else
E
Eric Dumazet 已提交
337
		buckets = (struct neighbour __rcu **)
338 339 340 341 342
			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
					   get_order(size));
	if (!buckets) {
		kfree(ret);
		return NULL;
L
Linus Torvalds 已提交
343
	}
E
Eric Dumazet 已提交
344
	ret->hash_buckets = buckets;
345
	ret->hash_shift = shift;
346
	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
347
	ret->hash_rnd |= 1;
L
Linus Torvalds 已提交
348 349 350
	return ret;
}

351
static void neigh_hash_free_rcu(struct rcu_head *head)
L
Linus Torvalds 已提交
352
{
353 354 355
	struct neigh_hash_table *nht = container_of(head,
						    struct neigh_hash_table,
						    rcu);
356
	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
E
Eric Dumazet 已提交
357
	struct neighbour __rcu **buckets = nht->hash_buckets;
L
Linus Torvalds 已提交
358 359

	if (size <= PAGE_SIZE)
360
		kfree(buckets);
L
Linus Torvalds 已提交
361
	else
362 363
		free_pages((unsigned long)buckets, get_order(size));
	kfree(nht);
L
Linus Torvalds 已提交
364 365
}

366
static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
367
						unsigned long new_shift)
L
Linus Torvalds 已提交
368
{
369 370
	unsigned int i, hash;
	struct neigh_hash_table *new_nht, *old_nht;
L
Linus Torvalds 已提交
371 372 373

	NEIGH_CACHE_STAT_INC(tbl, hash_grows);

374 375
	old_nht = rcu_dereference_protected(tbl->nht,
					    lockdep_is_held(&tbl->lock));
376
	new_nht = neigh_hash_alloc(new_shift);
377 378
	if (!new_nht)
		return old_nht;
L
Linus Torvalds 已提交
379

380
	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
L
Linus Torvalds 已提交
381 382
		struct neighbour *n, *next;

383 384
		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
						   lockdep_is_held(&tbl->lock));
385 386 387 388
		     n != NULL;
		     n = next) {
			hash = tbl->hash(n->primary_key, n->dev,
					 new_nht->hash_rnd);
L
Linus Torvalds 已提交
389

390
			hash >>= (32 - new_nht->hash_shift);
391 392 393 394 395 396 397 398
			next = rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock));

			rcu_assign_pointer(n->next,
					   rcu_dereference_protected(
						new_nht->hash_buckets[hash],
						lockdep_is_held(&tbl->lock)));
			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
L
Linus Torvalds 已提交
399 400 401
		}
	}

402 403 404
	rcu_assign_pointer(tbl->nht, new_nht);
	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
	return new_nht;
L
Linus Torvalds 已提交
405 406 407 408 409 410 411
}

struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
			       struct net_device *dev)
{
	struct neighbour *n;
	int key_len = tbl->key_len;
412
	u32 hash_val;
413
	struct neigh_hash_table *nht;
414

L
Linus Torvalds 已提交
415 416
	NEIGH_CACHE_STAT_INC(tbl, lookups);

417 418
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
419
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
420 421 422 423

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
L
Linus Torvalds 已提交
424
		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
425 426
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
L
Linus Torvalds 已提交
427 428 429 430
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
431

432
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
433 434
	return n;
}
435
EXPORT_SYMBOL(neigh_lookup);
L
Linus Torvalds 已提交
436

437 438
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
				     const void *pkey)
L
Linus Torvalds 已提交
439 440 441
{
	struct neighbour *n;
	int key_len = tbl->key_len;
442
	u32 hash_val;
443
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
444 445 446

	NEIGH_CACHE_STAT_INC(tbl, lookups);

447 448
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
449
	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
450 451 452 453

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
454
		if (!memcmp(n->primary_key, pkey, key_len) &&
455
		    net_eq(dev_net(n->dev), net)) {
456 457
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
L
Linus Torvalds 已提交
458 459 460 461
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
462

463
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
464 465
	return n;
}
466
EXPORT_SYMBOL(neigh_lookup_nodev);
L
Linus Torvalds 已提交
467 468 469 470 471 472 473

struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
			       struct net_device *dev)
{
	u32 hash_val;
	int key_len = tbl->key_len;
	int error;
474
	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
475
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501

	if (!n) {
		rc = ERR_PTR(-ENOBUFS);
		goto out;
	}

	memcpy(n->primary_key, pkey, key_len);
	n->dev = dev;
	dev_hold(dev);

	/* Protocol specific setup. */
	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

	/* Device specific setup. */
	if (n->parms->neigh_setup &&
	    (error = n->parms->neigh_setup(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

	write_lock_bh(&tbl->lock);
502 503
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
504

505 506
	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
L
Linus Torvalds 已提交
507

508
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
L
Linus Torvalds 已提交
509 510 511 512 513 514

	if (n->parms->dead) {
		rc = ERR_PTR(-EINVAL);
		goto out_tbl_unlock;
	}

515 516 517 518 519
	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
					    lockdep_is_held(&tbl->lock));
	     n1 != NULL;
	     n1 = rcu_dereference_protected(n1->next,
			lockdep_is_held(&tbl->lock))) {
L
Linus Torvalds 已提交
520 521 522 523 524 525 526 527 528
		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
			neigh_hold(n1);
			rc = n1;
			goto out_tbl_unlock;
		}
	}

	n->dead = 0;
	neigh_hold(n);
529 530 531 532
	rcu_assign_pointer(n->next,
			   rcu_dereference_protected(nht->hash_buckets[hash_val],
						     lockdep_is_held(&tbl->lock)));
	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
L
Linus Torvalds 已提交
533 534 535 536 537 538 539 540 541 542 543
	write_unlock_bh(&tbl->lock);
	NEIGH_PRINTK2("neigh %p is created.\n", n);
	rc = n;
out:
	return rc;
out_tbl_unlock:
	write_unlock_bh(&tbl->lock);
out_neigh_release:
	neigh_release(n);
	goto out;
}
544
EXPORT_SYMBOL(neigh_create);
L
Linus Torvalds 已提交
545

546
static u32 pneigh_hash(const void *pkey, int key_len)
547 548 549 550 551 552
{
	u32 hash_val = *(u32 *)(pkey + key_len - 4);
	hash_val ^= (hash_val >> 16);
	hash_val ^= hash_val >> 8;
	hash_val ^= hash_val >> 4;
	hash_val &= PNEIGH_HASHMASK;
553 554
	return hash_val;
}
555

556 557 558 559 560 561 562
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
					      struct net *net,
					      const void *pkey,
					      int key_len,
					      struct net_device *dev)
{
	while (n) {
563
		if (!memcmp(n->key, pkey, key_len) &&
564
		    net_eq(pneigh_net(n), net) &&
565
		    (n->dev == dev || !n->dev))
566 567
			return n;
		n = n->next;
568
	}
569 570
	return NULL;
}
571

572 573 574 575 576 577 578 579
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
		struct net *net, const void *pkey, struct net_device *dev)
{
	int key_len = tbl->key_len;
	u32 hash_val = pneigh_hash(pkey, key_len);

	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
				 net, pkey, key_len, dev);
580
}
581
EXPORT_SYMBOL_GPL(__pneigh_lookup);
582

583 584
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
				    struct net *net, const void *pkey,
L
Linus Torvalds 已提交
585 586 587 588
				    struct net_device *dev, int creat)
{
	struct pneigh_entry *n;
	int key_len = tbl->key_len;
589
	u32 hash_val = pneigh_hash(pkey, key_len);
L
Linus Torvalds 已提交
590 591

	read_lock_bh(&tbl->lock);
592 593
	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
			      net, pkey, key_len, dev);
L
Linus Torvalds 已提交
594
	read_unlock_bh(&tbl->lock);
595 596

	if (n || !creat)
L
Linus Torvalds 已提交
597 598
		goto out;

599 600
	ASSERT_RTNL();

L
Linus Torvalds 已提交
601 602 603 604
	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
	if (!n)
		goto out;

E
Eric Dumazet 已提交
605
	write_pnet(&n->net, hold_net(net));
L
Linus Torvalds 已提交
606 607 608 609 610 611 612 613
	memcpy(n->key, pkey, key_len);
	n->dev = dev;
	if (dev)
		dev_hold(dev);

	if (tbl->pconstructor && tbl->pconstructor(n)) {
		if (dev)
			dev_put(dev);
614
		release_net(net);
L
Linus Torvalds 已提交
615 616 617 618 619 620 621 622 623 624 625 626
		kfree(n);
		n = NULL;
		goto out;
	}

	write_lock_bh(&tbl->lock);
	n->next = tbl->phash_buckets[hash_val];
	tbl->phash_buckets[hash_val] = n;
	write_unlock_bh(&tbl->lock);
out:
	return n;
}
627
EXPORT_SYMBOL(pneigh_lookup);
L
Linus Torvalds 已提交
628 629


630
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
L
Linus Torvalds 已提交
631 632 633 634
		  struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	int key_len = tbl->key_len;
635
	u32 hash_val = pneigh_hash(pkey, key_len);
L
Linus Torvalds 已提交
636 637 638 639

	write_lock_bh(&tbl->lock);
	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
	     np = &n->next) {
640
		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
641
		    net_eq(pneigh_net(n), net)) {
L
Linus Torvalds 已提交
642 643 644 645 646 647
			*np = n->next;
			write_unlock_bh(&tbl->lock);
			if (tbl->pdestructor)
				tbl->pdestructor(n);
			if (n->dev)
				dev_put(n->dev);
648
			release_net(pneigh_net(n));
L
Linus Torvalds 已提交
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
			kfree(n);
			return 0;
		}
	}
	write_unlock_bh(&tbl->lock);
	return -ENOENT;
}

static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	u32 h;

	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
		np = &tbl->phash_buckets[h];
		while ((n = *np) != NULL) {
			if (!dev || n->dev == dev) {
				*np = n->next;
				if (tbl->pdestructor)
					tbl->pdestructor(n);
				if (n->dev)
					dev_put(n->dev);
671
				release_net(pneigh_net(n));
L
Linus Torvalds 已提交
672 673 674 675 676 677 678 679 680
				kfree(n);
				continue;
			}
			np = &n->next;
		}
	}
	return -ENOENT;
}

681 682 683 684 685 686 687
static void neigh_parms_destroy(struct neigh_parms *parms);

static inline void neigh_parms_put(struct neigh_parms *parms)
{
	if (atomic_dec_and_test(&parms->refcnt))
		neigh_parms_destroy(parms);
}
L
Linus Torvalds 已提交
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707

/*
 *	neighbour must already be out of the table;
 *
 */
void neigh_destroy(struct neighbour *neigh)
{
	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);

	if (!neigh->dead) {
		printk(KERN_WARNING
		       "Destroying alive neighbour %p\n", neigh);
		dump_stack();
		return;
	}

	if (neigh_del_timer(neigh))
		printk(KERN_WARNING "Impossible event.\n");

	skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
708
	neigh->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
709 710 711 712 713 714 715

	dev_put(neigh->dev);
	neigh_parms_put(neigh->parms);

	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);

	atomic_dec(&neigh->tbl->entries);
716
	kfree_rcu(neigh, rcu);
L
Linus Torvalds 已提交
717
}
718
EXPORT_SYMBOL(neigh_destroy);
L
Linus Torvalds 已提交
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743

/* Neighbour state is suspicious;
   disable fast path.

   Called with write_locked neigh.
 */
static void neigh_suspect(struct neighbour *neigh)
{
	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);

	neigh->output = neigh->ops->output;
}

/* Neighbour state is OK;
   enable fast path.

   Called with write_locked neigh.
 */
static void neigh_connect(struct neighbour *neigh)
{
	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);

	neigh->output = neigh->ops->connected_output;
}

744
static void neigh_periodic_work(struct work_struct *work)
L
Linus Torvalds 已提交
745
{
746
	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
747 748
	struct neighbour *n;
	struct neighbour __rcu **np;
749
	unsigned int i;
750
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
751 752 753

	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);

754
	write_lock_bh(&tbl->lock);
755 756
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
757 758 759 760 761

	/*
	 *	periodically recompute ReachableTime from random function
	 */

762
	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
L
Linus Torvalds 已提交
763
		struct neigh_parms *p;
764
		tbl->last_rand = jiffies;
L
Linus Torvalds 已提交
765 766 767 768 769
		for (p = &tbl->parms; p; p = p->next)
			p->reachable_time =
				neigh_rand_reach_time(p->base_reachable_time);
	}

770
	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
771
		np = &nht->hash_buckets[i];
L
Linus Torvalds 已提交
772

773 774
		while ((n = rcu_dereference_protected(*np,
				lockdep_is_held(&tbl->lock))) != NULL) {
775
			unsigned int state;
L
Linus Torvalds 已提交
776

777
			write_lock(&n->lock);
L
Linus Torvalds 已提交
778

779 780 781 782 783
			state = n->nud_state;
			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
				write_unlock(&n->lock);
				goto next_elt;
			}
L
Linus Torvalds 已提交
784

785 786
			if (time_before(n->used, n->confirmed))
				n->used = n->confirmed;
L
Linus Torvalds 已提交
787

788 789 790 791 792 793 794 795 796
			if (atomic_read(&n->refcnt) == 1 &&
			    (state == NUD_FAILED ||
			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
				*np = n->next;
				n->dead = 1;
				write_unlock(&n->lock);
				neigh_cleanup_and_release(n);
				continue;
			}
L
Linus Torvalds 已提交
797 798 799
			write_unlock(&n->lock);

next_elt:
800 801 802 803 804 805 806 807 808
			np = &n->next;
		}
		/*
		 * It's fine to release lock here, even if hash table
		 * grows while we are preempted.
		 */
		write_unlock_bh(&tbl->lock);
		cond_resched();
		write_lock_bh(&tbl->lock);
L
Linus Torvalds 已提交
809
	}
810 811 812
	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
	 * base_reachable_time.
L
Linus Torvalds 已提交
813
	 */
814 815 816
	schedule_delayed_work(&tbl->gc_work,
			      tbl->parms.base_reachable_time >> 1);
	write_unlock_bh(&tbl->lock);
L
Linus Torvalds 已提交
817 818 819 820 821
}

static __inline__ int neigh_max_probes(struct neighbour *n)
{
	struct neigh_parms *p = n->parms;
E
Eric Dumazet 已提交
822
	return (n->nud_state & NUD_PROBE) ?
L
Linus Torvalds 已提交
823
		p->ucast_probes :
E
Eric Dumazet 已提交
824
		p->ucast_probes + p->app_probes + p->mcast_probes;
L
Linus Torvalds 已提交
825 826
}

827
static void neigh_invalidate(struct neighbour *neigh)
E
Eric Dumazet 已提交
828 829
	__releases(neigh->lock)
	__acquires(neigh->lock)
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
{
	struct sk_buff *skb;

	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
	neigh->updated = jiffies;

	/* It is very thin place. report_unreachable is very complicated
	   routine. Particularly, it can hit the same neighbour entry!

	   So that, we try to be accurate and avoid dead loop. --ANK
	 */
	while (neigh->nud_state == NUD_FAILED &&
	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
		write_unlock(&neigh->lock);
		neigh->ops->error_report(neigh, skb);
		write_lock(&neigh->lock);
	}
	skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
849
	neigh->arp_queue_len_bytes = 0;
850 851
}

E
Eric Dumazet 已提交
852 853 854 855 856 857 858 859 860 861 862 863 864
static void neigh_probe(struct neighbour *neigh)
	__releases(neigh->lock)
{
	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
	/* keep skb alive even if arp_queue overflows */
	if (skb)
		skb = skb_copy(skb, GFP_ATOMIC);
	write_unlock(&neigh->lock);
	neigh->ops->solicit(neigh, skb);
	atomic_inc(&neigh->probes);
	kfree_skb(skb);
}

L
Linus Torvalds 已提交
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
/* Called when a timer expires for a neighbour entry. */

static void neigh_timer_handler(unsigned long arg)
{
	unsigned long now, next;
	struct neighbour *neigh = (struct neighbour *)arg;
	unsigned state;
	int notify = 0;

	write_lock(&neigh->lock);

	state = neigh->nud_state;
	now = jiffies;
	next = now + HZ;

880
	if (!(state & NUD_IN_TIMER))
L
Linus Torvalds 已提交
881 882 883
		goto out;

	if (state & NUD_REACHABLE) {
884
		if (time_before_eq(now,
L
Linus Torvalds 已提交
885 886 887 888 889 890 891
				   neigh->confirmed + neigh->parms->reachable_time)) {
			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else if (time_before_eq(now,
					  neigh->used + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
			neigh->nud_state = NUD_DELAY;
892
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
893 894 895 896 897
			neigh_suspect(neigh);
			next = now + neigh->parms->delay_probe_time;
		} else {
			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
			neigh->nud_state = NUD_STALE;
898
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
899
			neigh_suspect(neigh);
900
			notify = 1;
L
Linus Torvalds 已提交
901 902
		}
	} else if (state & NUD_DELAY) {
903
		if (time_before_eq(now,
L
Linus Torvalds 已提交
904 905 906
				   neigh->confirmed + neigh->parms->delay_probe_time)) {
			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
			neigh->nud_state = NUD_REACHABLE;
907
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
908
			neigh_connect(neigh);
909
			notify = 1;
L
Linus Torvalds 已提交
910 911 912 913
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else {
			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
			neigh->nud_state = NUD_PROBE;
914
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
915 916 917 918 919 920 921 922 923 924 925 926
			atomic_set(&neigh->probes, 0);
			next = now + neigh->parms->retrans_time;
		}
	} else {
		/* NUD_PROBE|NUD_INCOMPLETE */
		next = now + neigh->parms->retrans_time;
	}

	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
		neigh->nud_state = NUD_FAILED;
		notify = 1;
927
		neigh_invalidate(neigh);
L
Linus Torvalds 已提交
928 929 930 931 932
	}

	if (neigh->nud_state & NUD_IN_TIMER) {
		if (time_before(next, jiffies + HZ/2))
			next = jiffies + HZ/2;
933 934
		if (!mod_timer(&neigh->timer, next))
			neigh_hold(neigh);
L
Linus Torvalds 已提交
935 936
	}
	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
E
Eric Dumazet 已提交
937
		neigh_probe(neigh);
938
	} else {
939
out:
940 941
		write_unlock(&neigh->lock);
	}
T
Thomas Graf 已提交
942

943
	if (notify)
T
Thomas Graf 已提交
944
		neigh_update_notify(neigh);
L
Linus Torvalds 已提交
945 946 947 948 949 950 951

	neigh_release(neigh);
}

int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	int rc;
E
Eric Dumazet 已提交
952
	bool immediate_probe = false;
L
Linus Torvalds 已提交
953 954 955 956 957 958 959 960 961

	write_lock_bh(&neigh->lock);

	rc = 0;
	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
		goto out_unlock_bh;

	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
E
Eric Dumazet 已提交
962 963
			unsigned long next, now = jiffies;

L
Linus Torvalds 已提交
964 965
			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
			neigh->nud_state     = NUD_INCOMPLETE;
E
Eric Dumazet 已提交
966 967 968 969
			neigh->updated = now;
			next = now + max(neigh->parms->retrans_time, HZ/2);
			neigh_add_timer(neigh, next);
			immediate_probe = true;
L
Linus Torvalds 已提交
970 971
		} else {
			neigh->nud_state = NUD_FAILED;
972
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
973 974
			write_unlock_bh(&neigh->lock);

975
			kfree_skb(skb);
L
Linus Torvalds 已提交
976 977 978 979 980
			return 1;
		}
	} else if (neigh->nud_state & NUD_STALE) {
		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
		neigh->nud_state = NUD_DELAY;
981
		neigh->updated = jiffies;
982 983
		neigh_add_timer(neigh,
				jiffies + neigh->parms->delay_probe_time);
L
Linus Torvalds 已提交
984 985 986 987
	}

	if (neigh->nud_state == NUD_INCOMPLETE) {
		if (skb) {
E
Eric Dumazet 已提交
988 989
			while (neigh->arp_queue_len_bytes + skb->truesize >
			       neigh->parms->queue_len_bytes) {
L
Linus Torvalds 已提交
990
				struct sk_buff *buff;
E
Eric Dumazet 已提交
991

992
				buff = __skb_dequeue(&neigh->arp_queue);
E
Eric Dumazet 已提交
993 994 995
				if (!buff)
					break;
				neigh->arp_queue_len_bytes -= buff->truesize;
L
Linus Torvalds 已提交
996
				kfree_skb(buff);
997
				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
L
Linus Torvalds 已提交
998
			}
E
Eric Dumazet 已提交
999
			skb_dst_force(skb);
L
Linus Torvalds 已提交
1000
			__skb_queue_tail(&neigh->arp_queue, skb);
E
Eric Dumazet 已提交
1001
			neigh->arp_queue_len_bytes += skb->truesize;
L
Linus Torvalds 已提交
1002 1003 1004 1005
		}
		rc = 1;
	}
out_unlock_bh:
E
Eric Dumazet 已提交
1006 1007 1008 1009 1010
	if (immediate_probe)
		neigh_probe(neigh);
	else
		write_unlock(&neigh->lock);
	local_bh_enable();
L
Linus Torvalds 已提交
1011 1012
	return rc;
}
1013
EXPORT_SYMBOL(__neigh_event_send);
L
Linus Torvalds 已提交
1014

1015
static void neigh_update_hhs(struct neighbour *neigh)
L
Linus Torvalds 已提交
1016 1017
{
	struct hh_cache *hh;
1018
	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
D
Doug Kehn 已提交
1019 1020 1021 1022
		= NULL;

	if (neigh->dev->header_ops)
		update = neigh->dev->header_ops->cache_update;
L
Linus Torvalds 已提交
1023 1024

	if (update) {
1025 1026
		hh = &neigh->hh;
		if (hh->hh_len) {
1027
			write_seqlock_bh(&hh->hh_lock);
L
Linus Torvalds 已提交
1028
			update(hh, neigh->dev, neigh->ha);
1029
			write_sequnlock_bh(&hh->hh_lock);
L
Linus Torvalds 已提交
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
		}
	}
}



/* Generic update routine.
   -- lladdr is new lladdr or NULL, if it is not supplied.
   -- new    is new state.
   -- flags
	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
				if it is different.
	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1043
				lladdr instead of overriding it
L
Linus Torvalds 已提交
1044 1045 1046 1047 1048
				if it is different.
				It also allows to retain current state
				if lladdr is unchanged.
	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.

1049
	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
L
Linus Torvalds 已提交
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
				NTF_ROUTER flag.
	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
				a router.

   Caller MUST hold reference count on the entry.
 */

int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
		 u32 flags)
{
	u8 old;
	int err;
	int notify = 0;
	struct net_device *dev;
	int update_isrouter = 0;

	write_lock_bh(&neigh->lock);

	dev    = neigh->dev;
	old    = neigh->nud_state;
	err    = -EPERM;

1072
	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
L
Linus Torvalds 已提交
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
	    (old & (NUD_NOARP | NUD_PERMANENT)))
		goto out;

	if (!(new & NUD_VALID)) {
		neigh_del_timer(neigh);
		if (old & NUD_CONNECTED)
			neigh_suspect(neigh);
		neigh->nud_state = new;
		err = 0;
		notify = old & NUD_VALID;
1083 1084 1085 1086 1087
		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
		    (new & NUD_FAILED)) {
			neigh_invalidate(neigh);
			notify = 1;
		}
L
Linus Torvalds 已提交
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
		goto out;
	}

	/* Compare new lladdr with cached one */
	if (!dev->addr_len) {
		/* First case: device needs no address. */
		lladdr = neigh->ha;
	} else if (lladdr) {
		/* The second case: if something is already cached
		   and a new address is proposed:
		   - compare new & old
		   - if they are different, check override flag
		 */
1101
		if ((old & NUD_VALID) &&
L
Linus Torvalds 已提交
1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
		    !memcmp(lladdr, neigh->ha, dev->addr_len))
			lladdr = neigh->ha;
	} else {
		/* No address is supplied; if we know something,
		   use it, otherwise discard the request.
		 */
		err = -EINVAL;
		if (!(old & NUD_VALID))
			goto out;
		lladdr = neigh->ha;
	}

	if (new & NUD_CONNECTED)
		neigh->confirmed = jiffies;
	neigh->updated = jiffies;

	/* If entry was valid and address is not changed,
	   do not change entry state, if new one is STALE.
	 */
	err = 0;
	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
	if (old & NUD_VALID) {
		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
			update_isrouter = 0;
			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
			    (old & NUD_CONNECTED)) {
				lladdr = neigh->ha;
				new = NUD_STALE;
			} else
				goto out;
		} else {
			if (lladdr == neigh->ha && new == NUD_STALE &&
			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
			     (old & NUD_CONNECTED))
			    )
				new = old;
		}
	}

	if (new != old) {
		neigh_del_timer(neigh);
1143
		if (new & NUD_IN_TIMER)
1144 1145
			neigh_add_timer(neigh, (jiffies +
						((new & NUD_REACHABLE) ?
1146 1147
						 neigh->parms->reachable_time :
						 0)));
L
Linus Torvalds 已提交
1148 1149 1150 1151
		neigh->nud_state = new;
	}

	if (lladdr != neigh->ha) {
1152
		write_seqlock(&neigh->ha_lock);
L
Linus Torvalds 已提交
1153
		memcpy(&neigh->ha, lladdr, dev->addr_len);
1154
		write_sequnlock(&neigh->ha_lock);
L
Linus Torvalds 已提交
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
		neigh_update_hhs(neigh);
		if (!(new & NUD_CONNECTED))
			neigh->confirmed = jiffies -
				      (neigh->parms->base_reachable_time << 1);
		notify = 1;
	}
	if (new == old)
		goto out;
	if (new & NUD_CONNECTED)
		neigh_connect(neigh);
	else
		neigh_suspect(neigh);
	if (!(old & NUD_VALID)) {
		struct sk_buff *skb;

		/* Again: avoid dead loop if something went wrong */

		while (neigh->nud_state & NUD_VALID &&
		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1174 1175
			struct dst_entry *dst = skb_dst(skb);
			struct neighbour *n2, *n1 = neigh;
L
Linus Torvalds 已提交
1176
			write_unlock_bh(&neigh->lock);
1177 1178

			rcu_read_lock();
L
Linus Torvalds 已提交
1179
			/* On shaper/eql skb->dst->neighbour != neigh :( */
1180 1181
			if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
				n1 = n2;
1182
			n1->output(n1, skb);
1183 1184
			rcu_read_unlock();

L
Linus Torvalds 已提交
1185 1186 1187
			write_lock_bh(&neigh->lock);
		}
		skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
1188
		neigh->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
1189 1190 1191 1192 1193 1194 1195 1196
	}
out:
	if (update_isrouter) {
		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
			(neigh->flags | NTF_ROUTER) :
			(neigh->flags & ~NTF_ROUTER);
	}
	write_unlock_bh(&neigh->lock);
1197 1198

	if (notify)
T
Thomas Graf 已提交
1199 1200
		neigh_update_notify(neigh);

L
Linus Torvalds 已提交
1201 1202
	return err;
}
1203
EXPORT_SYMBOL(neigh_update);
L
Linus Torvalds 已提交
1204 1205 1206 1207 1208 1209 1210 1211

struct neighbour *neigh_event_ns(struct neigh_table *tbl,
				 u8 *lladdr, void *saddr,
				 struct net_device *dev)
{
	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
						 lladdr || !dev->addr_len);
	if (neigh)
1212
		neigh_update(neigh, lladdr, NUD_STALE,
L
Linus Torvalds 已提交
1213 1214 1215
			     NEIGH_UPDATE_F_OVERRIDE);
	return neigh;
}
1216
EXPORT_SYMBOL(neigh_event_ns);
L
Linus Torvalds 已提交
1217

E
Eric Dumazet 已提交
1218
/* called with read_lock_bh(&n->lock); */
1219
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
L
Linus Torvalds 已提交
1220 1221
{
	struct net_device *dev = dst->dev;
1222 1223
	__be16 prot = dst->ops->protocol;
	struct hh_cache	*hh = &n->hh;
1224 1225

	write_lock_bh(&n->lock);
E
Eric Dumazet 已提交
1226

1227 1228 1229
	/* Only one thread can come in here and initialize the
	 * hh_cache entry.
	 */
1230 1231
	if (!hh->hh_len)
		dev->header_ops->cache(n, hh, prot);
E
Eric Dumazet 已提交
1232

1233
	write_unlock_bh(&n->lock);
L
Linus Torvalds 已提交
1234 1235 1236
}

/* This function can be used in contexts, where only old dev_queue_xmit
1237 1238
 * worked, f.e. if you want to override normal output path (eql, shaper),
 * but resolution is not made yet.
L
Linus Torvalds 已提交
1239 1240
 */

1241
int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1242 1243 1244
{
	struct net_device *dev = skb->dev;

1245
	__skb_pull(skb, skb_network_offset(skb));
L
Linus Torvalds 已提交
1246

1247 1248
	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
			    skb->len) < 0 &&
1249
	    dev->header_ops->rebuild(skb))
L
Linus Torvalds 已提交
1250 1251 1252 1253
		return 0;

	return dev_queue_xmit(skb);
}
1254
EXPORT_SYMBOL(neigh_compat_output);
L
Linus Torvalds 已提交
1255 1256 1257

/* Slow and careful. */

1258
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1259
{
E
Eric Dumazet 已提交
1260
	struct dst_entry *dst = skb_dst(skb);
L
Linus Torvalds 已提交
1261 1262
	int rc = 0;

1263
	if (!dst)
L
Linus Torvalds 已提交
1264 1265
		goto discard;

1266
	__skb_pull(skb, skb_network_offset(skb));
L
Linus Torvalds 已提交
1267 1268 1269 1270

	if (!neigh_event_send(neigh, skb)) {
		int err;
		struct net_device *dev = neigh->dev;
1271
		unsigned int seq;
E
Eric Dumazet 已提交
1272

1273 1274
		if (dev->header_ops->cache && !neigh->hh.hh_len)
			neigh_hh_init(neigh, dst);
E
Eric Dumazet 已提交
1275

1276 1277 1278 1279 1280
		do {
			seq = read_seqbegin(&neigh->ha_lock);
			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
					      neigh->ha, NULL, skb->len);
		} while (read_seqretry(&neigh->ha_lock, seq));
E
Eric Dumazet 已提交
1281

L
Linus Torvalds 已提交
1282
		if (err >= 0)
1283
			rc = dev_queue_xmit(skb);
L
Linus Torvalds 已提交
1284 1285 1286 1287 1288 1289 1290
		else
			goto out_kfree_skb;
	}
out:
	return rc;
discard:
	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1291
		      dst, neigh);
L
Linus Torvalds 已提交
1292 1293 1294 1295 1296
out_kfree_skb:
	rc = -EINVAL;
	kfree_skb(skb);
	goto out;
}
1297
EXPORT_SYMBOL(neigh_resolve_output);
L
Linus Torvalds 已提交
1298 1299 1300

/* As fast as possible without hh cache */

1301
int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1302 1303
{
	struct net_device *dev = neigh->dev;
1304
	unsigned int seq;
1305
	int err;
L
Linus Torvalds 已提交
1306

1307
	__skb_pull(skb, skb_network_offset(skb));
L
Linus Torvalds 已提交
1308

1309 1310 1311 1312 1313 1314
	do {
		seq = read_seqbegin(&neigh->ha_lock);
		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
				      neigh->ha, NULL, skb->len);
	} while (read_seqretry(&neigh->ha_lock, seq));

L
Linus Torvalds 已提交
1315
	if (err >= 0)
1316
		err = dev_queue_xmit(skb);
L
Linus Torvalds 已提交
1317 1318 1319 1320 1321 1322
	else {
		err = -EINVAL;
		kfree_skb(skb);
	}
	return err;
}
1323
EXPORT_SYMBOL(neigh_connected_output);
L
Linus Torvalds 已提交
1324

1325 1326 1327 1328 1329 1330
int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
{
	return dev_queue_xmit(skb);
}
EXPORT_SYMBOL(neigh_direct_output);

L
Linus Torvalds 已提交
1331 1332 1333 1334 1335
static void neigh_proxy_process(unsigned long arg)
{
	struct neigh_table *tbl = (struct neigh_table *)arg;
	long sched_next = 0;
	unsigned long now = jiffies;
1336
	struct sk_buff *skb, *n;
L
Linus Torvalds 已提交
1337 1338 1339

	spin_lock(&tbl->proxy_queue.lock);

1340 1341
	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
		long tdif = NEIGH_CB(skb)->sched_next - now;
L
Linus Torvalds 已提交
1342 1343

		if (tdif <= 0) {
1344
			struct net_device *dev = skb->dev;
1345

1346
			__skb_unlink(skb, &tbl->proxy_queue);
1347 1348
			if (tbl->proxy_redo && netif_running(dev)) {
				rcu_read_lock();
1349
				tbl->proxy_redo(skb);
1350 1351
				rcu_read_unlock();
			} else {
1352
				kfree_skb(skb);
1353
			}
L
Linus Torvalds 已提交
1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374

			dev_put(dev);
		} else if (!sched_next || tdif < sched_next)
			sched_next = tdif;
	}
	del_timer(&tbl->proxy_timer);
	if (sched_next)
		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
	spin_unlock(&tbl->proxy_queue.lock);
}

void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
		    struct sk_buff *skb)
{
	unsigned long now = jiffies;
	unsigned long sched_next = now + (net_random() % p->proxy_delay);

	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
		kfree_skb(skb);
		return;
	}
1375 1376 1377

	NEIGH_CB(skb)->sched_next = sched_next;
	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
L
Linus Torvalds 已提交
1378 1379 1380 1381 1382 1383

	spin_lock(&tbl->proxy_queue.lock);
	if (del_timer(&tbl->proxy_timer)) {
		if (time_before(tbl->proxy_timer.expires, sched_next))
			sched_next = tbl->proxy_timer.expires;
	}
E
Eric Dumazet 已提交
1384
	skb_dst_drop(skb);
L
Linus Torvalds 已提交
1385 1386 1387 1388 1389
	dev_hold(skb->dev);
	__skb_queue_tail(&tbl->proxy_queue, skb);
	mod_timer(&tbl->proxy_timer, sched_next);
	spin_unlock(&tbl->proxy_queue.lock);
}
1390
EXPORT_SYMBOL(pneigh_enqueue);
L
Linus Torvalds 已提交
1391

1392
static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1393 1394 1395 1396 1397
						      struct net *net, int ifindex)
{
	struct neigh_parms *p;

	for (p = &tbl->parms; p; p = p->next) {
1398
		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1399 1400 1401 1402 1403 1404
		    (!p->dev && !ifindex))
			return p;
	}

	return NULL;
}
L
Linus Torvalds 已提交
1405 1406 1407 1408

struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
				      struct neigh_table *tbl)
{
1409
	struct neigh_parms *p, *ref;
1410 1411
	struct net *net = dev_net(dev);
	const struct net_device_ops *ops = dev->netdev_ops;
1412

1413
	ref = lookup_neigh_parms(tbl, net, 0);
1414 1415
	if (!ref)
		return NULL;
L
Linus Torvalds 已提交
1416

1417
	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
1418 1419 1420 1421 1422
	if (p) {
		p->tbl		  = tbl;
		atomic_set(&p->refcnt, 1);
		p->reachable_time =
				neigh_rand_reach_time(p->base_reachable_time);
1423

1424
		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1425 1426
			kfree(p);
			return NULL;
L
Linus Torvalds 已提交
1427
		}
1428 1429 1430

		dev_hold(dev);
		p->dev = dev;
E
Eric Dumazet 已提交
1431
		write_pnet(&p->net, hold_net(net));
L
Linus Torvalds 已提交
1432 1433 1434 1435 1436 1437 1438 1439
		p->sysctl_table = NULL;
		write_lock_bh(&tbl->lock);
		p->next		= tbl->parms.next;
		tbl->parms.next = p;
		write_unlock_bh(&tbl->lock);
	}
	return p;
}
1440
EXPORT_SYMBOL(neigh_parms_alloc);
L
Linus Torvalds 已提交
1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461

static void neigh_rcu_free_parms(struct rcu_head *head)
{
	struct neigh_parms *parms =
		container_of(head, struct neigh_parms, rcu_head);

	neigh_parms_put(parms);
}

void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
	struct neigh_parms **p;

	if (!parms || parms == &tbl->parms)
		return;
	write_lock_bh(&tbl->lock);
	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
		if (*p == parms) {
			*p = parms->next;
			parms->dead = 1;
			write_unlock_bh(&tbl->lock);
1462 1463
			if (parms->dev)
				dev_put(parms->dev);
L
Linus Torvalds 已提交
1464 1465 1466 1467 1468 1469 1470
			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
			return;
		}
	}
	write_unlock_bh(&tbl->lock);
	NEIGH_PRINTK1("neigh_parms_release: not found\n");
}
1471
EXPORT_SYMBOL(neigh_parms_release);
L
Linus Torvalds 已提交
1472

1473
static void neigh_parms_destroy(struct neigh_parms *parms)
L
Linus Torvalds 已提交
1474
{
1475
	release_net(neigh_parms_net(parms));
L
Linus Torvalds 已提交
1476 1477 1478
	kfree(parms);
}

1479 1480
static struct lock_class_key neigh_table_proxy_queue_class;

1481
void neigh_table_init_no_netlink(struct neigh_table *tbl)
L
Linus Torvalds 已提交
1482 1483 1484 1485
{
	unsigned long now = jiffies;
	unsigned long phsize;

E
Eric Dumazet 已提交
1486
	write_pnet(&tbl->parms.net, &init_net);
L
Linus Torvalds 已提交
1487 1488 1489 1490 1491 1492 1493
	atomic_set(&tbl->parms.refcnt, 1);
	tbl->parms.reachable_time =
			  neigh_rand_reach_time(tbl->parms.base_reachable_time);

	tbl->stats = alloc_percpu(struct neigh_statistics);
	if (!tbl->stats)
		panic("cannot create neighbour cache statistics");
1494

L
Linus Torvalds 已提交
1495
#ifdef CONFIG_PROC_FS
1496 1497
	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
			      &neigh_stat_seq_fops, tbl))
L
Linus Torvalds 已提交
1498 1499 1500
		panic("cannot create neighbour proc dir entry");
#endif

1501
	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
L
Linus Torvalds 已提交
1502 1503

	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
A
Andrew Morton 已提交
1504
	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
L
Linus Torvalds 已提交
1505

1506
	if (!tbl->nht || !tbl->phash_buckets)
L
Linus Torvalds 已提交
1507 1508 1509
		panic("cannot allocate neighbour cache hashes");

	rwlock_init(&tbl->lock);
1510 1511
	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1512
	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1513 1514
	skb_queue_head_init_class(&tbl->proxy_queue,
			&neigh_table_proxy_queue_class);
L
Linus Torvalds 已提交
1515 1516 1517

	tbl->last_flush = now;
	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1518
}
1519
EXPORT_SYMBOL(neigh_table_init_no_netlink);
1520 1521 1522 1523 1524 1525

void neigh_table_init(struct neigh_table *tbl)
{
	struct neigh_table *tmp;

	neigh_table_init_no_netlink(tbl);
L
Linus Torvalds 已提交
1526
	write_lock(&neigh_tbl_lock);
1527 1528 1529 1530
	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
		if (tmp->family == tbl->family)
			break;
	}
L
Linus Torvalds 已提交
1531 1532 1533
	tbl->next	= neigh_tables;
	neigh_tables	= tbl;
	write_unlock(&neigh_tbl_lock);
1534 1535 1536 1537 1538 1539

	if (unlikely(tmp)) {
		printk(KERN_ERR "NEIGH: Registering multiple tables for "
		       "family %d\n", tbl->family);
		dump_stack();
	}
L
Linus Torvalds 已提交
1540
}
1541
EXPORT_SYMBOL(neigh_table_init);
L
Linus Torvalds 已提交
1542 1543 1544 1545 1546 1547

int neigh_table_clear(struct neigh_table *tbl)
{
	struct neigh_table **tp;

	/* It is not clean... Fix it to unload IPv6 module safely */
1548
	cancel_delayed_work_sync(&tbl->gc_work);
L
Linus Torvalds 已提交
1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562
	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	neigh_ifdown(tbl, NULL);
	if (atomic_read(&tbl->entries))
		printk(KERN_CRIT "neighbour leakage\n");
	write_lock(&neigh_tbl_lock);
	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
		if (*tp == tbl) {
			*tp = tbl->next;
			break;
		}
	}
	write_unlock(&neigh_tbl_lock);

E
Eric Dumazet 已提交
1563 1564
	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
		 neigh_hash_free_rcu);
1565
	tbl->nht = NULL;
L
Linus Torvalds 已提交
1566 1567 1568 1569

	kfree(tbl->phash_buckets);
	tbl->phash_buckets = NULL;

1570 1571
	remove_proc_entry(tbl->id, init_net.proc_net_stat);

1572 1573 1574
	free_percpu(tbl->stats);
	tbl->stats = NULL;

L
Linus Torvalds 已提交
1575 1576
	return 0;
}
1577
EXPORT_SYMBOL(neigh_table_clear);
L
Linus Torvalds 已提交
1578

1579
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
L
Linus Torvalds 已提交
1580
{
1581
	struct net *net = sock_net(skb->sk);
1582 1583
	struct ndmsg *ndm;
	struct nlattr *dst_attr;
L
Linus Torvalds 已提交
1584 1585
	struct neigh_table *tbl;
	struct net_device *dev = NULL;
1586
	int err = -EINVAL;
L
Linus Torvalds 已提交
1587

1588
	ASSERT_RTNL();
1589
	if (nlmsg_len(nlh) < sizeof(*ndm))
L
Linus Torvalds 已提交
1590 1591
		goto out;

1592 1593 1594 1595 1596 1597
	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
	if (dst_attr == NULL)
		goto out;

	ndm = nlmsg_data(nlh);
	if (ndm->ndm_ifindex) {
1598
		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1599 1600 1601 1602 1603 1604
		if (dev == NULL) {
			err = -ENODEV;
			goto out;
		}
	}

L
Linus Torvalds 已提交
1605 1606
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1607
		struct neighbour *neigh;
L
Linus Torvalds 已提交
1608 1609 1610 1611 1612

		if (tbl->family != ndm->ndm_family)
			continue;
		read_unlock(&neigh_tbl_lock);

1613
		if (nla_len(dst_attr) < tbl->key_len)
1614
			goto out;
L
Linus Torvalds 已提交
1615 1616

		if (ndm->ndm_flags & NTF_PROXY) {
1617
			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1618
			goto out;
L
Linus Torvalds 已提交
1619 1620
		}

1621
		if (dev == NULL)
1622
			goto out;
L
Linus Torvalds 已提交
1623

1624 1625 1626
		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
		if (neigh == NULL) {
			err = -ENOENT;
1627
			goto out;
L
Linus Torvalds 已提交
1628
		}
1629 1630 1631 1632 1633

		err = neigh_update(neigh, NULL, NUD_FAILED,
				   NEIGH_UPDATE_F_OVERRIDE |
				   NEIGH_UPDATE_F_ADMIN);
		neigh_release(neigh);
1634
		goto out;
L
Linus Torvalds 已提交
1635 1636
	}
	read_unlock(&neigh_tbl_lock);
1637 1638
	err = -EAFNOSUPPORT;

L
Linus Torvalds 已提交
1639 1640 1641 1642
out:
	return err;
}

1643
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
L
Linus Torvalds 已提交
1644
{
1645
	struct net *net = sock_net(skb->sk);
1646 1647
	struct ndmsg *ndm;
	struct nlattr *tb[NDA_MAX+1];
L
Linus Torvalds 已提交
1648 1649
	struct neigh_table *tbl;
	struct net_device *dev = NULL;
1650
	int err;
L
Linus Torvalds 已提交
1651

1652
	ASSERT_RTNL();
1653 1654
	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
	if (err < 0)
L
Linus Torvalds 已提交
1655 1656
		goto out;

1657 1658 1659 1660 1661 1662
	err = -EINVAL;
	if (tb[NDA_DST] == NULL)
		goto out;

	ndm = nlmsg_data(nlh);
	if (ndm->ndm_ifindex) {
1663
		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1664 1665 1666 1667 1668 1669
		if (dev == NULL) {
			err = -ENODEV;
			goto out;
		}

		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1670
			goto out;
1671 1672
	}

L
Linus Torvalds 已提交
1673 1674
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1675 1676 1677
		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
		struct neighbour *neigh;
		void *dst, *lladdr;
L
Linus Torvalds 已提交
1678 1679 1680 1681 1682

		if (tbl->family != ndm->ndm_family)
			continue;
		read_unlock(&neigh_tbl_lock);

1683
		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1684
			goto out;
1685 1686
		dst = nla_data(tb[NDA_DST]);
		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
L
Linus Torvalds 已提交
1687 1688

		if (ndm->ndm_flags & NTF_PROXY) {
1689 1690 1691
			struct pneigh_entry *pn;

			err = -ENOBUFS;
1692
			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1693 1694 1695 1696
			if (pn) {
				pn->flags = ndm->ndm_flags;
				err = 0;
			}
1697
			goto out;
L
Linus Torvalds 已提交
1698 1699
		}

1700
		if (dev == NULL)
1701
			goto out;
1702 1703 1704 1705 1706

		neigh = neigh_lookup(tbl, dst, dev);
		if (neigh == NULL) {
			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
				err = -ENOENT;
1707
				goto out;
1708
			}
1709

1710 1711 1712
			neigh = __neigh_lookup_errno(tbl, dst, dev);
			if (IS_ERR(neigh)) {
				err = PTR_ERR(neigh);
1713
				goto out;
L
Linus Torvalds 已提交
1714 1715
			}
		} else {
1716 1717 1718
			if (nlh->nlmsg_flags & NLM_F_EXCL) {
				err = -EEXIST;
				neigh_release(neigh);
1719
				goto out;
L
Linus Torvalds 已提交
1720 1721
			}

1722 1723 1724
			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
		}
L
Linus Torvalds 已提交
1725

1726 1727 1728 1729 1730
		if (ndm->ndm_flags & NTF_USE) {
			neigh_event_send(neigh, NULL);
			err = 0;
		} else
			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1731
		neigh_release(neigh);
1732
		goto out;
L
Linus Torvalds 已提交
1733 1734 1735
	}

	read_unlock(&neigh_tbl_lock);
1736
	err = -EAFNOSUPPORT;
L
Linus Torvalds 已提交
1737 1738 1739 1740
out:
	return err;
}

1741 1742
static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
1743 1744 1745 1746 1747
	struct nlattr *nest;

	nest = nla_nest_start(skb, NDTA_PARMS);
	if (nest == NULL)
		return -ENOBUFS;
1748 1749

	if (parms->dev)
1750 1751 1752
		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);

	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
E
Eric Dumazet 已提交
1753 1754 1755 1756 1757
	NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
	/* approximative value for deprecated QUEUE_LEN (in packets) */
	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
		    DIV_ROUND_UP(parms->queue_len_bytes,
				 SKB_TRUESIZE(ETH_FRAME_LEN)));
1758 1759 1760 1761 1762 1763
	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1764
		      parms->base_reachable_time);
1765 1766 1767 1768 1769 1770
	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1771

1772
	return nla_nest_end(skb, nest);
1773

1774
nla_put_failure:
1775 1776
	nla_nest_cancel(skb, nest);
	return -EMSGSIZE;
1777 1778
}

1779 1780
static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
			      u32 pid, u32 seq, int type, int flags)
1781 1782 1783 1784
{
	struct nlmsghdr *nlh;
	struct ndtmsg *ndtmsg;

1785 1786
	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
	if (nlh == NULL)
1787
		return -EMSGSIZE;
1788

1789
	ndtmsg = nlmsg_data(nlh);
1790 1791 1792

	read_lock_bh(&tbl->lock);
	ndtmsg->ndtm_family = tbl->family;
1793 1794
	ndtmsg->ndtm_pad1   = 0;
	ndtmsg->ndtm_pad2   = 0;
1795

1796 1797 1798 1799 1800
	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1801 1802 1803 1804 1805

	{
		unsigned long now = jiffies;
		unsigned int flush_delta = now - tbl->last_flush;
		unsigned int rand_delta = now - tbl->last_rand;
1806
		struct neigh_hash_table *nht;
1807 1808 1809 1810 1811 1812 1813 1814 1815
		struct ndt_config ndc = {
			.ndtc_key_len		= tbl->key_len,
			.ndtc_entry_size	= tbl->entry_size,
			.ndtc_entries		= atomic_read(&tbl->entries),
			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
		};

1816 1817 1818
		rcu_read_lock_bh();
		nht = rcu_dereference_bh(tbl->nht);
		ndc.ndtc_hash_rnd = nht->hash_rnd;
1819
		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1820 1821
		rcu_read_unlock_bh();

1822
		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1823 1824 1825 1826 1827 1828 1829 1830
	}

	{
		int cpu;
		struct ndt_stats ndst;

		memset(&ndst, 0, sizeof(ndst));

1831
		for_each_possible_cpu(cpu) {
1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846
			struct neigh_statistics	*st;

			st = per_cpu_ptr(tbl->stats, cpu);
			ndst.ndts_allocs		+= st->allocs;
			ndst.ndts_destroys		+= st->destroys;
			ndst.ndts_hash_grows		+= st->hash_grows;
			ndst.ndts_res_failed		+= st->res_failed;
			ndst.ndts_lookups		+= st->lookups;
			ndst.ndts_hits			+= st->hits;
			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
		}

1847
		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1848 1849 1850 1851
	}

	BUG_ON(tbl->parms.dev);
	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1852
		goto nla_put_failure;
1853 1854

	read_unlock_bh(&tbl->lock);
1855
	return nlmsg_end(skb, nlh);
1856

1857
nla_put_failure:
1858
	read_unlock_bh(&tbl->lock);
1859 1860
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1861 1862
}

1863 1864
static int neightbl_fill_param_info(struct sk_buff *skb,
				    struct neigh_table *tbl,
1865
				    struct neigh_parms *parms,
1866 1867
				    u32 pid, u32 seq, int type,
				    unsigned int flags)
1868 1869 1870 1871
{
	struct ndtmsg *ndtmsg;
	struct nlmsghdr *nlh;

1872 1873
	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
	if (nlh == NULL)
1874
		return -EMSGSIZE;
1875

1876
	ndtmsg = nlmsg_data(nlh);
1877 1878 1879

	read_lock_bh(&tbl->lock);
	ndtmsg->ndtm_family = tbl->family;
1880 1881
	ndtmsg->ndtm_pad1   = 0;
	ndtmsg->ndtm_pad2   = 0;
1882

1883 1884 1885
	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
	    neightbl_fill_parms(skb, parms) < 0)
		goto errout;
1886 1887

	read_unlock_bh(&tbl->lock);
1888 1889
	return nlmsg_end(skb, nlh);
errout:
1890
	read_unlock_bh(&tbl->lock);
1891 1892
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1893
}
1894

1895
static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1896 1897 1898 1899 1900 1901 1902 1903
	[NDTA_NAME]		= { .type = NLA_STRING },
	[NDTA_THRESH1]		= { .type = NLA_U32 },
	[NDTA_THRESH2]		= { .type = NLA_U32 },
	[NDTA_THRESH3]		= { .type = NLA_U32 },
	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
	[NDTA_PARMS]		= { .type = NLA_NESTED },
};

1904
static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919
	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
};

1920
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1921
{
1922
	struct net *net = sock_net(skb->sk);
1923
	struct neigh_table *tbl;
1924 1925 1926
	struct ndtmsg *ndtmsg;
	struct nlattr *tb[NDTA_MAX+1];
	int err;
1927

1928 1929 1930 1931
	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
			  nl_neightbl_policy);
	if (err < 0)
		goto errout;
1932

1933 1934 1935 1936 1937 1938
	if (tb[NDTA_NAME] == NULL) {
		err = -EINVAL;
		goto errout;
	}

	ndtmsg = nlmsg_data(nlh);
1939 1940 1941 1942 1943
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
			continue;

1944
		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1945 1946 1947 1948 1949
			break;
	}

	if (tbl == NULL) {
		err = -ENOENT;
1950
		goto errout_locked;
1951 1952
	}

1953
	/*
1954 1955 1956 1957 1958
	 * We acquire tbl->lock to be nice to the periodic timers and
	 * make sure they always see a consistent set of values.
	 */
	write_lock_bh(&tbl->lock);

1959 1960
	if (tb[NDTA_PARMS]) {
		struct nlattr *tbp[NDTPA_MAX+1];
1961
		struct neigh_parms *p;
1962
		int i, ifindex = 0;
1963

1964 1965 1966 1967
		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
				       nl_ntbl_parm_policy);
		if (err < 0)
			goto errout_tbl_lock;
1968

1969 1970
		if (tbp[NDTPA_IFINDEX])
			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1971

1972
		p = lookup_neigh_parms(tbl, net, ifindex);
1973 1974
		if (p == NULL) {
			err = -ENOENT;
1975
			goto errout_tbl_lock;
1976 1977
		}

1978 1979 1980
		for (i = 1; i <= NDTPA_MAX; i++) {
			if (tbp[i] == NULL)
				continue;
1981

1982 1983
			switch (i) {
			case NDTPA_QUEUE_LEN:
E
Eric Dumazet 已提交
1984 1985 1986 1987 1988
				p->queue_len_bytes = nla_get_u32(tbp[i]) *
						     SKB_TRUESIZE(ETH_FRAME_LEN);
				break;
			case NDTPA_QUEUE_LENBYTES:
				p->queue_len_bytes = nla_get_u32(tbp[i]);
1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
				break;
			case NDTPA_PROXY_QLEN:
				p->proxy_qlen = nla_get_u32(tbp[i]);
				break;
			case NDTPA_APP_PROBES:
				p->app_probes = nla_get_u32(tbp[i]);
				break;
			case NDTPA_UCAST_PROBES:
				p->ucast_probes = nla_get_u32(tbp[i]);
				break;
			case NDTPA_MCAST_PROBES:
				p->mcast_probes = nla_get_u32(tbp[i]);
				break;
			case NDTPA_BASE_REACHABLE_TIME:
				p->base_reachable_time = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_GC_STALETIME:
				p->gc_staletime = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_DELAY_PROBE_TIME:
				p->delay_probe_time = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_RETRANS_TIME:
				p->retrans_time = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_ANYCAST_DELAY:
				p->anycast_delay = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_PROXY_DELAY:
				p->proxy_delay = nla_get_msecs(tbp[i]);
				break;
			case NDTPA_LOCKTIME:
				p->locktime = nla_get_msecs(tbp[i]);
				break;
			}
		}
	}
2026

2027 2028
	if (tb[NDTA_THRESH1])
		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2029

2030 2031
	if (tb[NDTA_THRESH2])
		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2032

2033 2034
	if (tb[NDTA_THRESH3])
		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2035

2036 2037
	if (tb[NDTA_GC_INTERVAL])
		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2038 2039 2040

	err = 0;

2041
errout_tbl_lock:
2042
	write_unlock_bh(&tbl->lock);
2043
errout_locked:
2044
	read_unlock(&neigh_tbl_lock);
2045
errout:
2046 2047 2048
	return err;
}

2049
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2050
{
2051
	struct net *net = sock_net(skb->sk);
2052 2053 2054
	int family, tidx, nidx = 0;
	int tbl_skip = cb->args[0];
	int neigh_skip = cb->args[1];
2055 2056
	struct neigh_table *tbl;

2057
	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2058 2059

	read_lock(&neigh_tbl_lock);
2060
	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2061 2062
		struct neigh_parms *p;

2063
		if (tidx < tbl_skip || (family && tbl->family != family))
2064 2065
			continue;

2066 2067 2068
		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
				       NLM_F_MULTI) <= 0)
2069 2070
			break;

2071
		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2072
			if (!net_eq(neigh_parms_net(p), net))
2073 2074
				continue;

2075 2076
			if (nidx < neigh_skip)
				goto next;
2077

2078 2079 2080 2081 2082
			if (neightbl_fill_param_info(skb, tbl, p,
						     NETLINK_CB(cb->skb).pid,
						     cb->nlh->nlmsg_seq,
						     RTM_NEWNEIGHTBL,
						     NLM_F_MULTI) <= 0)
2083
				goto out;
2084 2085
		next:
			nidx++;
2086 2087
		}

2088
		neigh_skip = 0;
2089 2090 2091
	}
out:
	read_unlock(&neigh_tbl_lock);
2092 2093
	cb->args[0] = tidx;
	cb->args[1] = nidx;
2094 2095 2096

	return skb->len;
}
L
Linus Torvalds 已提交
2097

2098 2099
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
			   u32 pid, u32 seq, int type, unsigned int flags)
L
Linus Torvalds 已提交
2100 2101 2102
{
	unsigned long now = jiffies;
	struct nda_cacheinfo ci;
2103 2104 2105 2106 2107
	struct nlmsghdr *nlh;
	struct ndmsg *ndm;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
	if (nlh == NULL)
2108
		return -EMSGSIZE;
L
Linus Torvalds 已提交
2109

2110 2111
	ndm = nlmsg_data(nlh);
	ndm->ndm_family	 = neigh->ops->family;
2112 2113
	ndm->ndm_pad1    = 0;
	ndm->ndm_pad2    = 0;
2114 2115 2116
	ndm->ndm_flags	 = neigh->flags;
	ndm->ndm_type	 = neigh->type;
	ndm->ndm_ifindex = neigh->dev->ifindex;
L
Linus Torvalds 已提交
2117

2118 2119 2120 2121
	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);

	read_lock_bh(&neigh->lock);
	ndm->ndm_state	 = neigh->nud_state;
2122 2123 2124 2125 2126 2127 2128 2129
	if (neigh->nud_state & NUD_VALID) {
		char haddr[MAX_ADDR_LEN];

		neigh_ha_snapshot(haddr, neigh, neigh->dev);
		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
			read_unlock_bh(&neigh->lock);
			goto nla_put_failure;
		}
2130 2131
	}

2132 2133 2134
	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2135 2136 2137 2138 2139 2140 2141 2142 2143
	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
	read_unlock_bh(&neigh->lock);

	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);

	return nlmsg_end(skb, nlh);

nla_put_failure:
2144 2145
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
L
Linus Torvalds 已提交
2146 2147
}

T
Thomas Graf 已提交
2148 2149 2150 2151 2152
static void neigh_update_notify(struct neighbour *neigh)
{
	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
}
L
Linus Torvalds 已提交
2153 2154 2155 2156

static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
			    struct netlink_callback *cb)
{
2157
	struct net *net = sock_net(skb->sk);
L
Linus Torvalds 已提交
2158 2159 2160
	struct neighbour *n;
	int rc, h, s_h = cb->args[1];
	int idx, s_idx = idx = cb->args[2];
2161
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2162

2163 2164 2165
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);

2166
	for (h = 0; h < (1 << nht->hash_shift); h++) {
L
Linus Torvalds 已提交
2167 2168 2169 2170
		if (h < s_h)
			continue;
		if (h > s_h)
			s_idx = 0;
2171 2172 2173
		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
		     n != NULL;
		     n = rcu_dereference_bh(n->next)) {
O
Octavian Purdila 已提交
2174
			if (!net_eq(dev_net(n->dev), net))
2175
				continue;
2176 2177
			if (idx < s_idx)
				goto next;
L
Linus Torvalds 已提交
2178 2179
			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
					    cb->nlh->nlmsg_seq,
2180 2181
					    RTM_NEWNEIGH,
					    NLM_F_MULTI) <= 0) {
L
Linus Torvalds 已提交
2182 2183 2184
				rc = -1;
				goto out;
			}
2185
next:
2186
			idx++;
L
Linus Torvalds 已提交
2187 2188 2189 2190
		}
	}
	rc = skb->len;
out:
2191
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2192 2193 2194 2195 2196
	cb->args[1] = h;
	cb->args[2] = idx;
	return rc;
}

2197
static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
L
Linus Torvalds 已提交
2198 2199 2200 2201 2202
{
	struct neigh_table *tbl;
	int t, family, s_t;

	read_lock(&neigh_tbl_lock);
2203
	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
L
Linus Torvalds 已提交
2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223
	s_t = cb->args[0];

	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
		if (t < s_t || (family && tbl->family != family))
			continue;
		if (t > s_t)
			memset(&cb->args[1], 0, sizeof(cb->args) -
						sizeof(cb->args[0]));
		if (neigh_dump_table(tbl, skb, cb) < 0)
			break;
	}
	read_unlock(&neigh_tbl_lock);

	cb->args[0] = t;
	return skb->len;
}

void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
	int chain;
2224
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2225

2226 2227 2228
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);

2229
	read_lock(&tbl->lock); /* avoid resizes */
2230
	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
L
Linus Torvalds 已提交
2231 2232
		struct neighbour *n;

2233 2234 2235
		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
		     n != NULL;
		     n = rcu_dereference_bh(n->next))
L
Linus Torvalds 已提交
2236 2237
			cb(n, cookie);
	}
2238 2239
	read_unlock(&tbl->lock);
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2240 2241 2242 2243 2244 2245 2246 2247
}
EXPORT_SYMBOL(neigh_for_each);

/* The tbl->lock must be held as a writer and BH disabled. */
void __neigh_for_each_release(struct neigh_table *tbl,
			      int (*cb)(struct neighbour *))
{
	int chain;
2248
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2249

2250 2251
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
2252
	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2253 2254
		struct neighbour *n;
		struct neighbour __rcu **np;
L
Linus Torvalds 已提交
2255

2256
		np = &nht->hash_buckets[chain];
2257 2258
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
2259 2260 2261 2262 2263
			int release;

			write_lock(&n->lock);
			release = cb(n);
			if (release) {
2264 2265 2266
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
2267 2268 2269 2270
				n->dead = 1;
			} else
				np = &n->next;
			write_unlock(&n->lock);
2271 2272
			if (release)
				neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
2273 2274 2275 2276 2277 2278 2279 2280 2281 2282
		}
	}
}
EXPORT_SYMBOL(__neigh_for_each_release);

#ifdef CONFIG_PROC_FS

static struct neighbour *neigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
2283
	struct net *net = seq_file_net(seq);
2284
	struct neigh_hash_table *nht = state->nht;
L
Linus Torvalds 已提交
2285 2286 2287 2288
	struct neighbour *n = NULL;
	int bucket = state->bucket;

	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2289
	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2290
		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
L
Linus Torvalds 已提交
2291 2292

		while (n) {
2293
			if (!net_eq(dev_net(n->dev), net))
2294
				goto next;
L
Linus Torvalds 已提交
2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306
			if (state->neigh_sub_iter) {
				loff_t fakep = 0;
				void *v;

				v = state->neigh_sub_iter(state, n, &fakep);
				if (!v)
					goto next;
			}
			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
				break;
			if (n->nud_state & ~NUD_NOARP)
				break;
2307 2308
next:
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323
		}

		if (n)
			break;
	}
	state->bucket = bucket;

	return n;
}

static struct neighbour *neigh_get_next(struct seq_file *seq,
					struct neighbour *n,
					loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
2324
	struct net *net = seq_file_net(seq);
2325
	struct neigh_hash_table *nht = state->nht;
L
Linus Torvalds 已提交
2326 2327 2328 2329 2330 2331

	if (state->neigh_sub_iter) {
		void *v = state->neigh_sub_iter(state, n, pos);
		if (v)
			return n;
	}
2332
	n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2333 2334 2335

	while (1) {
		while (n) {
2336
			if (!net_eq(dev_net(n->dev), net))
2337
				goto next;
L
Linus Torvalds 已提交
2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348
			if (state->neigh_sub_iter) {
				void *v = state->neigh_sub_iter(state, n, pos);
				if (v)
					return n;
				goto next;
			}
			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
				break;

			if (n->nud_state & ~NUD_NOARP)
				break;
2349 2350
next:
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2351 2352 2353 2354 2355
		}

		if (n)
			break;

2356
		if (++state->bucket >= (1 << nht->hash_shift))
L
Linus Torvalds 已提交
2357 2358
			break;

2359
		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
L
Linus Torvalds 已提交
2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371
	}

	if (n && pos)
		--(*pos);
	return n;
}

static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
{
	struct neighbour *n = neigh_get_first(seq);

	if (n) {
2372
		--(*pos);
L
Linus Torvalds 已提交
2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384
		while (*pos) {
			n = neigh_get_next(seq, n, pos);
			if (!n)
				break;
		}
	}
	return *pos ? NULL : n;
}

static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
2385
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
2386 2387 2388 2389 2390 2391 2392
	struct neigh_table *tbl = state->tbl;
	struct pneigh_entry *pn = NULL;
	int bucket = state->bucket;

	state->flags |= NEIGH_SEQ_IS_PNEIGH;
	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
		pn = tbl->phash_buckets[bucket];
2393
		while (pn && !net_eq(pneigh_net(pn), net))
2394
			pn = pn->next;
L
Linus Torvalds 已提交
2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407
		if (pn)
			break;
	}
	state->bucket = bucket;

	return pn;
}

static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
					    struct pneigh_entry *pn,
					    loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
2408
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
2409 2410
	struct neigh_table *tbl = state->tbl;

2411 2412 2413 2414
	do {
		pn = pn->next;
	} while (pn && !net_eq(pneigh_net(pn), net));

L
Linus Torvalds 已提交
2415 2416 2417 2418
	while (!pn) {
		if (++state->bucket > PNEIGH_HASHMASK)
			break;
		pn = tbl->phash_buckets[state->bucket];
2419
		while (pn && !net_eq(pneigh_net(pn), net))
2420
			pn = pn->next;
L
Linus Torvalds 已提交
2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435
		if (pn)
			break;
	}

	if (pn && pos)
		--(*pos);

	return pn;
}

static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
{
	struct pneigh_entry *pn = pneigh_get_first(seq);

	if (pn) {
2436
		--(*pos);
L
Linus Torvalds 已提交
2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449
		while (*pos) {
			pn = pneigh_get_next(seq, pn, pos);
			if (!pn)
				break;
		}
	}
	return *pos ? NULL : pn;
}

static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
	void *rc;
2450
	loff_t idxpos = *pos;
L
Linus Torvalds 已提交
2451

2452
	rc = neigh_get_idx(seq, &idxpos);
L
Linus Torvalds 已提交
2453
	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2454
		rc = pneigh_get_idx(seq, &idxpos);
L
Linus Torvalds 已提交
2455 2456 2457 2458 2459

	return rc;
}

void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2460
	__acquires(rcu_bh)
L
Linus Torvalds 已提交
2461 2462 2463 2464 2465 2466 2467
{
	struct neigh_seq_state *state = seq->private;

	state->tbl = tbl;
	state->bucket = 0;
	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);

2468 2469
	rcu_read_lock_bh();
	state->nht = rcu_dereference_bh(tbl->nht);
2470

2471
	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
L
Linus Torvalds 已提交
2472 2473 2474 2475 2476 2477 2478 2479 2480
}
EXPORT_SYMBOL(neigh_seq_start);

void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct neigh_seq_state *state;
	void *rc;

	if (v == SEQ_START_TOKEN) {
2481
		rc = neigh_get_first(seq);
L
Linus Torvalds 已提交
2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502
		goto out;
	}

	state = seq->private;
	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
		rc = neigh_get_next(seq, v, NULL);
		if (rc)
			goto out;
		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
			rc = pneigh_get_first(seq);
	} else {
		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
		rc = pneigh_get_next(seq, v, NULL);
	}
out:
	++(*pos);
	return rc;
}
EXPORT_SYMBOL(neigh_seq_next);

void neigh_seq_stop(struct seq_file *seq, void *v)
2503
	__releases(rcu_bh)
L
Linus Torvalds 已提交
2504
{
2505
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2506 2507 2508 2509 2510 2511 2512
}
EXPORT_SYMBOL(neigh_seq_stop);

/* statistics via seq_file */

static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
A
Alexey Dobriyan 已提交
2513
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2514 2515 2516 2517
	int cpu;

	if (*pos == 0)
		return SEQ_START_TOKEN;
2518

2519
	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
L
Linus Torvalds 已提交
2520 2521 2522 2523 2524 2525 2526 2527 2528 2529
		if (!cpu_possible(cpu))
			continue;
		*pos = cpu+1;
		return per_cpu_ptr(tbl->stats, cpu);
	}
	return NULL;
}

static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
A
Alexey Dobriyan 已提交
2530
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2531 2532
	int cpu;

2533
	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
L
Linus Torvalds 已提交
2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548
		if (!cpu_possible(cpu))
			continue;
		*pos = cpu+1;
		return per_cpu_ptr(tbl->stats, cpu);
	}
	return NULL;
}

static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
{

}

static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
A
Alexey Dobriyan 已提交
2549
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2550 2551 2552
	struct neigh_statistics *st = v;

	if (v == SEQ_START_TOKEN) {
2553
		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
L
Linus Torvalds 已提交
2554 2555 2556 2557
		return 0;
	}

	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2558
			"%08lx %08lx  %08lx %08lx %08lx\n",
L
Linus Torvalds 已提交
2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573
		   atomic_read(&tbl->entries),

		   st->allocs,
		   st->destroys,
		   st->hash_grows,

		   st->lookups,
		   st->hits,

		   st->res_failed,

		   st->rcv_probes_mcast,
		   st->rcv_probes_ucast,

		   st->periodic_gc_runs,
2574 2575
		   st->forced_gc_runs,
		   st->unres_discards
L
Linus Torvalds 已提交
2576 2577 2578 2579 2580
		   );

	return 0;
}

2581
static const struct seq_operations neigh_stat_seq_ops = {
L
Linus Torvalds 已提交
2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593
	.start	= neigh_stat_seq_start,
	.next	= neigh_stat_seq_next,
	.stop	= neigh_stat_seq_stop,
	.show	= neigh_stat_seq_show,
};

static int neigh_stat_seq_open(struct inode *inode, struct file *file)
{
	int ret = seq_open(file, &neigh_stat_seq_ops);

	if (!ret) {
		struct seq_file *sf = file->private_data;
A
Alexey Dobriyan 已提交
2594
		sf->private = PDE(inode)->data;
L
Linus Torvalds 已提交
2595 2596 2597 2598
	}
	return ret;
};

2599
static const struct file_operations neigh_stat_seq_fops = {
L
Linus Torvalds 已提交
2600 2601 2602 2603 2604 2605 2606 2607 2608
	.owner	 = THIS_MODULE,
	.open 	 = neigh_stat_seq_open,
	.read	 = seq_read,
	.llseek	 = seq_lseek,
	.release = seq_release,
};

#endif /* CONFIG_PROC_FS */

2609 2610 2611 2612 2613 2614 2615 2616 2617
static inline size_t neigh_nlmsg_size(void)
{
	return NLMSG_ALIGN(sizeof(struct ndmsg))
	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
	       + nla_total_size(sizeof(struct nda_cacheinfo))
	       + nla_total_size(4); /* NDA_PROBES */
}

2618
static void __neigh_notify(struct neighbour *n, int type, int flags)
L
Linus Torvalds 已提交
2619
{
2620
	struct net *net = dev_net(n->dev);
2621
	struct sk_buff *skb;
2622
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
2623

2624
	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2625
	if (skb == NULL)
2626
		goto errout;
L
Linus Torvalds 已提交
2627

2628
	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2629 2630 2631 2632 2633 2634
	if (err < 0) {
		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
2635 2636
	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
	return;
2637 2638
errout:
	if (err < 0)
2639
		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
L
Linus Torvalds 已提交
2640 2641
}

T
Thomas Graf 已提交
2642
#ifdef CONFIG_ARPD
2643
void neigh_app_ns(struct neighbour *n)
L
Linus Torvalds 已提交
2644
{
2645 2646
	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
}
2647
EXPORT_SYMBOL(neigh_app_ns);
L
Linus Torvalds 已提交
2648 2649 2650 2651
#endif /* CONFIG_ARPD */

#ifdef CONFIG_SYSCTL

E
Eric Dumazet 已提交
2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687
static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
			   size_t *lenp, loff_t *ppos)
{
	int size, ret;
	ctl_table tmp = *ctl;

	tmp.data = &size;
	size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
	if (write && !ret)
		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
	return ret;
}

enum {
	NEIGH_VAR_MCAST_PROBE,
	NEIGH_VAR_UCAST_PROBE,
	NEIGH_VAR_APP_PROBE,
	NEIGH_VAR_RETRANS_TIME,
	NEIGH_VAR_BASE_REACHABLE_TIME,
	NEIGH_VAR_DELAY_PROBE_TIME,
	NEIGH_VAR_GC_STALETIME,
	NEIGH_VAR_QUEUE_LEN,
	NEIGH_VAR_QUEUE_LEN_BYTES,
	NEIGH_VAR_PROXY_QLEN,
	NEIGH_VAR_ANYCAST_DELAY,
	NEIGH_VAR_PROXY_DELAY,
	NEIGH_VAR_LOCKTIME,
	NEIGH_VAR_RETRANS_TIME_MS,
	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
	NEIGH_VAR_GC_INTERVAL,
	NEIGH_VAR_GC_THRESH1,
	NEIGH_VAR_GC_THRESH2,
	NEIGH_VAR_GC_THRESH3,
	NEIGH_VAR_MAX
};
2688

L
Linus Torvalds 已提交
2689 2690
static struct neigh_sysctl_table {
	struct ctl_table_header *sysctl_header;
E
Eric Dumazet 已提交
2691
	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2692
	char *dev_name;
2693
} neigh_sysctl_template __read_mostly = {
L
Linus Torvalds 已提交
2694
	.neigh_vars = {
E
Eric Dumazet 已提交
2695
		[NEIGH_VAR_MCAST_PROBE] = {
L
Linus Torvalds 已提交
2696 2697 2698
			.procname	= "mcast_solicit",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2699
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2700
		},
E
Eric Dumazet 已提交
2701
		[NEIGH_VAR_UCAST_PROBE] = {
L
Linus Torvalds 已提交
2702 2703 2704
			.procname	= "ucast_solicit",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2705
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2706
		},
E
Eric Dumazet 已提交
2707
		[NEIGH_VAR_APP_PROBE] = {
L
Linus Torvalds 已提交
2708 2709 2710
			.procname	= "app_solicit",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2711
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2712
		},
E
Eric Dumazet 已提交
2713
		[NEIGH_VAR_RETRANS_TIME] = {
L
Linus Torvalds 已提交
2714 2715 2716
			.procname	= "retrans_time",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2717
			.proc_handler	= proc_dointvec_userhz_jiffies,
L
Linus Torvalds 已提交
2718
		},
E
Eric Dumazet 已提交
2719
		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
L
Linus Torvalds 已提交
2720 2721 2722
			.procname	= "base_reachable_time",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2723
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2724
		},
E
Eric Dumazet 已提交
2725
		[NEIGH_VAR_DELAY_PROBE_TIME] = {
L
Linus Torvalds 已提交
2726 2727 2728
			.procname	= "delay_first_probe_time",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2729
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2730
		},
E
Eric Dumazet 已提交
2731
		[NEIGH_VAR_GC_STALETIME] = {
L
Linus Torvalds 已提交
2732 2733 2734
			.procname	= "gc_stale_time",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2735
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2736
		},
E
Eric Dumazet 已提交
2737
		[NEIGH_VAR_QUEUE_LEN] = {
L
Linus Torvalds 已提交
2738 2739 2740
			.procname	= "unres_qlen",
			.maxlen		= sizeof(int),
			.mode		= 0644,
E
Eric Dumazet 已提交
2741 2742 2743 2744 2745 2746
			.proc_handler	= proc_unres_qlen,
		},
		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
			.procname	= "unres_qlen_bytes",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2747
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2748
		},
E
Eric Dumazet 已提交
2749
		[NEIGH_VAR_PROXY_QLEN] = {
L
Linus Torvalds 已提交
2750 2751 2752
			.procname	= "proxy_qlen",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2753
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2754
		},
E
Eric Dumazet 已提交
2755
		[NEIGH_VAR_ANYCAST_DELAY] = {
L
Linus Torvalds 已提交
2756 2757 2758
			.procname	= "anycast_delay",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2759
			.proc_handler	= proc_dointvec_userhz_jiffies,
L
Linus Torvalds 已提交
2760
		},
E
Eric Dumazet 已提交
2761
		[NEIGH_VAR_PROXY_DELAY] = {
L
Linus Torvalds 已提交
2762 2763 2764
			.procname	= "proxy_delay",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2765
			.proc_handler	= proc_dointvec_userhz_jiffies,
L
Linus Torvalds 已提交
2766
		},
E
Eric Dumazet 已提交
2767
		[NEIGH_VAR_LOCKTIME] = {
L
Linus Torvalds 已提交
2768 2769 2770
			.procname	= "locktime",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2771
			.proc_handler	= proc_dointvec_userhz_jiffies,
L
Linus Torvalds 已提交
2772
		},
E
Eric Dumazet 已提交
2773
		[NEIGH_VAR_RETRANS_TIME_MS] = {
2774 2775 2776
			.procname	= "retrans_time_ms",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2777
			.proc_handler	= proc_dointvec_ms_jiffies,
2778
		},
E
Eric Dumazet 已提交
2779
		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2780 2781 2782
			.procname	= "base_reachable_time_ms",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2783
			.proc_handler	= proc_dointvec_ms_jiffies,
2784
		},
E
Eric Dumazet 已提交
2785
		[NEIGH_VAR_GC_INTERVAL] = {
L
Linus Torvalds 已提交
2786 2787 2788
			.procname	= "gc_interval",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2789
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2790
		},
E
Eric Dumazet 已提交
2791
		[NEIGH_VAR_GC_THRESH1] = {
L
Linus Torvalds 已提交
2792 2793 2794
			.procname	= "gc_thresh1",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2795
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2796
		},
E
Eric Dumazet 已提交
2797
		[NEIGH_VAR_GC_THRESH2] = {
L
Linus Torvalds 已提交
2798 2799 2800
			.procname	= "gc_thresh2",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2801
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2802
		},
E
Eric Dumazet 已提交
2803
		[NEIGH_VAR_GC_THRESH3] = {
L
Linus Torvalds 已提交
2804 2805 2806
			.procname	= "gc_thresh3",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2807
			.proc_handler	= proc_dointvec,
L
Linus Torvalds 已提交
2808
		},
2809
		{},
L
Linus Torvalds 已提交
2810 2811 2812 2813
	},
};

int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2814
			  char *p_name, proc_handler *handler)
L
Linus Torvalds 已提交
2815
{
2816
	struct neigh_sysctl_table *t;
L
Linus Torvalds 已提交
2817
	const char *dev_name_source = NULL;
2818 2819 2820 2821 2822 2823 2824

#define NEIGH_CTL_PATH_ROOT	0
#define NEIGH_CTL_PATH_PROTO	1
#define NEIGH_CTL_PATH_NEIGH	2
#define NEIGH_CTL_PATH_DEV	3

	struct ctl_path neigh_path[] = {
2825 2826 2827 2828
		{ .procname = "net",	 },
		{ .procname = "proto",	 },
		{ .procname = "neigh",	 },
		{ .procname = "default", },
2829 2830
		{ },
	};
L
Linus Torvalds 已提交
2831

2832
	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
L
Linus Torvalds 已提交
2833
	if (!t)
2834 2835
		goto err;

E
Eric Dumazet 已提交
2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850
	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
L
Linus Torvalds 已提交
2851 2852 2853

	if (dev) {
		dev_name_source = dev->name;
2854
		/* Terminate the table early */
E
Eric Dumazet 已提交
2855 2856
		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
L
Linus Torvalds 已提交
2857
	} else {
2858
		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
E
Eric Dumazet 已提交
2859 2860 2861 2862
		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
L
Linus Torvalds 已提交
2863 2864 2865
	}


2866
	if (handler) {
L
Linus Torvalds 已提交
2867
		/* RetransTime */
E
Eric Dumazet 已提交
2868 2869
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
L
Linus Torvalds 已提交
2870
		/* ReachableTime */
E
Eric Dumazet 已提交
2871 2872
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
L
Linus Torvalds 已提交
2873
		/* RetransTime (in milliseconds)*/
E
Eric Dumazet 已提交
2874 2875
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
L
Linus Torvalds 已提交
2876
		/* ReachableTime (in milliseconds) */
E
Eric Dumazet 已提交
2877 2878
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
L
Linus Torvalds 已提交
2879 2880
	}

2881 2882
	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
	if (!t->dev_name)
L
Linus Torvalds 已提交
2883 2884
		goto free;

2885 2886
	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
L
Linus Torvalds 已提交
2887

2888
	t->sysctl_header =
2889
		register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2890
	if (!t->sysctl_header)
L
Linus Torvalds 已提交
2891
		goto free_procname;
2892

L
Linus Torvalds 已提交
2893 2894 2895
	p->sysctl_table = t;
	return 0;

2896
free_procname:
2897
	kfree(t->dev_name);
2898
free:
L
Linus Torvalds 已提交
2899
	kfree(t);
2900 2901
err:
	return -ENOBUFS;
L
Linus Torvalds 已提交
2902
}
2903
EXPORT_SYMBOL(neigh_sysctl_register);
L
Linus Torvalds 已提交
2904 2905 2906 2907 2908 2909 2910

void neigh_sysctl_unregister(struct neigh_parms *p)
{
	if (p->sysctl_table) {
		struct neigh_sysctl_table *t = p->sysctl_table;
		p->sysctl_table = NULL;
		unregister_sysctl_table(t->sysctl_header);
2911
		kfree(t->dev_name);
L
Linus Torvalds 已提交
2912 2913 2914
		kfree(t);
	}
}
2915
EXPORT_SYMBOL(neigh_sysctl_unregister);
L
Linus Torvalds 已提交
2916 2917 2918

#endif	/* CONFIG_SYSCTL */

2919 2920
static int __init neigh_init(void)
{
2921 2922 2923
	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2924

2925 2926 2927
	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
		      NULL);
	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2928 2929 2930 2931 2932 2933

	return 0;
}

subsys_initcall(neigh_init);