neighbour.c 75.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 *	Generic address resolution entity
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Fixes:
 *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
 *	Harald Welte		Add neighbour cache statistics like rtstat
 */

J
Joe Perches 已提交
18 19
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

20
#include <linux/slab.h>
L
Linus Torvalds 已提交
21 22 23 24 25 26 27 28 29 30
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/times.h>
31
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
32 33 34
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
35
#include <net/netevent.h>
36
#include <net/netlink.h>
L
Linus Torvalds 已提交
37 38
#include <linux/rtnetlink.h>
#include <linux/random.h>
39
#include <linux/string.h>
40
#include <linux/log2.h>
41
#include <linux/inetdevice.h>
42
#include <net/addrconf.h>
L
Linus Torvalds 已提交
43

44
#define DEBUG
L
Linus Torvalds 已提交
45
#define NEIGH_DEBUG 1
46 47 48 49 50
#define neigh_dbg(level, fmt, ...)		\
do {						\
	if (level <= NEIGH_DEBUG)		\
		pr_debug(fmt, ##__VA_ARGS__);	\
} while (0)
L
Linus Torvalds 已提交
51 52 53 54

#define PNEIGH_HASHMASK		0xF

static void neigh_timer_handler(unsigned long arg);
T
Thomas Graf 已提交
55 56
static void __neigh_notify(struct neighbour *n, int type, int flags);
static void neigh_update_notify(struct neighbour *neigh);
L
Linus Torvalds 已提交
57 58 59
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);

static struct neigh_table *neigh_tables;
60
#ifdef CONFIG_PROC_FS
61
static const struct file_operations neigh_stat_seq_fops;
62
#endif
L
Linus Torvalds 已提交
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96

/*
   Neighbour hash table buckets are protected with rwlock tbl->lock.

   - All the scans/updates to hash buckets MUST be made under this lock.
   - NOTHING clever should be made under this lock: no callbacks
     to protocol backends, no attempts to send something to network.
     It will result in deadlocks, if backend/driver wants to use neighbour
     cache.
   - If the entry requires some non-trivial actions, increase
     its reference count and release table lock.

   Neighbour entries are protected:
   - with reference count.
   - with rwlock neigh->lock

   Reference count prevents destruction.

   neigh->lock mainly serializes ll address data and its validity state.
   However, the same lock is used to protect another entry fields:
    - timer
    - resolution queue

   Again, nothing clever shall be made under neigh->lock,
   the most complicated procedure, which we allow is dev->hard_header.
   It is supposed, that dev->hard_header is simplistic and does
   not make callbacks to neighbour tables.

   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
   list of neighbour tables. This list is used only in process context,
 */

static DEFINE_RWLOCK(neigh_tbl_lock);

97
static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
98 99 100 101 102
{
	kfree_skb(skb);
	return -ENETDOWN;
}

103 104 105 106 107
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
	if (neigh->parms->neigh_cleanup)
		neigh->parms->neigh_cleanup(neigh);

T
Thomas Graf 已提交
108
	__neigh_notify(neigh, RTM_DELNEIGH, 0);
109 110 111
	neigh_release(neigh);
}

L
Linus Torvalds 已提交
112 113 114 115 116 117 118 119
/*
 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 * It corresponds to default IPv6 settings and is not overridable,
 * because it is really reasonable choice.
 */

unsigned long neigh_rand_reach_time(unsigned long base)
{
E
Eric Dumazet 已提交
120
	return base ? (net_random() % base) + (base >> 1) : 0;
L
Linus Torvalds 已提交
121
}
122
EXPORT_SYMBOL(neigh_rand_reach_time);
L
Linus Torvalds 已提交
123 124 125 126 127 128


static int neigh_forced_gc(struct neigh_table *tbl)
{
	int shrunk = 0;
	int i;
129
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
130 131 132 133

	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);

	write_lock_bh(&tbl->lock);
134 135
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
136
	for (i = 0; i < (1 << nht->hash_shift); i++) {
137 138
		struct neighbour *n;
		struct neighbour __rcu **np;
L
Linus Torvalds 已提交
139

140
		np = &nht->hash_buckets[i];
141 142
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
143 144 145 146 147 148 149
			/* Neighbour record may be discarded if:
			 * - nobody refers to it.
			 * - it is not permanent
			 */
			write_lock(&n->lock);
			if (atomic_read(&n->refcnt) == 1 &&
			    !(n->nud_state & NUD_PERMANENT)) {
150 151 152
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						  lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
153 154 155
				n->dead = 1;
				shrunk	= 1;
				write_unlock(&n->lock);
156
				neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
157 158 159 160 161 162 163 164 165 166 167 168 169 170
				continue;
			}
			write_unlock(&n->lock);
			np = &n->next;
		}
	}

	tbl->last_flush = jiffies;

	write_unlock_bh(&tbl->lock);

	return shrunk;
}

171 172 173 174 175 176 177 178 179 180
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
	neigh_hold(n);
	if (unlikely(mod_timer(&n->timer, when))) {
		printk("NEIGH: BUG, double timer add, state is %x\n",
		       n->nud_state);
		dump_stack();
	}
}

L
Linus Torvalds 已提交
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
static int neigh_del_timer(struct neighbour *n)
{
	if ((n->nud_state & NUD_IN_TIMER) &&
	    del_timer(&n->timer)) {
		neigh_release(n);
		return 1;
	}
	return 0;
}

static void pneigh_queue_purge(struct sk_buff_head *list)
{
	struct sk_buff *skb;

	while ((skb = skb_dequeue(list)) != NULL) {
		dev_put(skb->dev);
		kfree_skb(skb);
	}
}

201
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
L
Linus Torvalds 已提交
202 203
{
	int i;
204
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
205

206 207 208
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));

209
	for (i = 0; i < (1 << nht->hash_shift); i++) {
210 211
		struct neighbour *n;
		struct neighbour __rcu **np = &nht->hash_buckets[i];
L
Linus Torvalds 已提交
212

213 214
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
215 216 217 218
			if (dev && n->dev != dev) {
				np = &n->next;
				continue;
			}
219 220 221
			rcu_assign_pointer(*np,
				   rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235
			write_lock(&n->lock);
			neigh_del_timer(n);
			n->dead = 1;

			if (atomic_read(&n->refcnt) != 1) {
				/* The most unpleasant situation.
				   We must destroy neighbour entry,
				   but someone still uses it.

				   The destroy will be delayed until
				   the last user releases us, but
				   we must kill timers etc. and move
				   it to safe state.
				 */
236
				__skb_queue_purge(&n->arp_queue);
E
Eric Dumazet 已提交
237
				n->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
238 239 240 241 242
				n->output = neigh_blackhole;
				if (n->nud_state & NUD_VALID)
					n->nud_state = NUD_NOARP;
				else
					n->nud_state = NUD_NONE;
243
				neigh_dbg(2, "neigh %p is stray\n", n);
L
Linus Torvalds 已提交
244 245
			}
			write_unlock(&n->lock);
246
			neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
247 248
		}
	}
249
}
L
Linus Torvalds 已提交
250

251 252 253 254 255 256
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
	write_unlock_bh(&tbl->lock);
}
257
EXPORT_SYMBOL(neigh_changeaddr);
258 259 260 261 262

int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
L
Linus Torvalds 已提交
263 264 265 266 267 268 269
	pneigh_ifdown(tbl, dev);
	write_unlock_bh(&tbl->lock);

	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	return 0;
}
270
EXPORT_SYMBOL(neigh_ifdown);
L
Linus Torvalds 已提交
271

272
static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
L
Linus Torvalds 已提交
273 274 275 276 277 278 279 280 281 282 283 284 285 286
{
	struct neighbour *n = NULL;
	unsigned long now = jiffies;
	int entries;

	entries = atomic_inc_return(&tbl->entries) - 1;
	if (entries >= tbl->gc_thresh3 ||
	    (entries >= tbl->gc_thresh2 &&
	     time_after(now, tbl->last_flush + 5 * HZ))) {
		if (!neigh_forced_gc(tbl) &&
		    entries >= tbl->gc_thresh3)
			goto out_entries;
	}

287
	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
L
Linus Torvalds 已提交
288 289 290
	if (!n)
		goto out_entries;

291
	__skb_queue_head_init(&n->arp_queue);
L
Linus Torvalds 已提交
292
	rwlock_init(&n->lock);
293
	seqlock_init(&n->ha_lock);
L
Linus Torvalds 已提交
294 295 296
	n->updated	  = n->used = now;
	n->nud_state	  = NUD_NONE;
	n->output	  = neigh_blackhole;
297
	seqlock_init(&n->hh.hh_lock);
L
Linus Torvalds 已提交
298
	n->parms	  = neigh_parms_clone(&tbl->parms);
299
	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
L
Linus Torvalds 已提交
300 301 302 303 304 305 306 307 308 309 310 311 312

	NEIGH_CACHE_STAT_INC(tbl, allocs);
	n->tbl		  = tbl;
	atomic_set(&n->refcnt, 1);
	n->dead		  = 1;
out:
	return n;

out_entries:
	atomic_dec(&tbl->entries);
	goto out;
}

313 314 315 316 317 318
static void neigh_get_hash_rnd(u32 *x)
{
	get_random_bytes(x, sizeof(*x));
	*x |= 1;
}

319
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
L
Linus Torvalds 已提交
320
{
321
	size_t size = (1 << shift) * sizeof(struct neighbour *);
322
	struct neigh_hash_table *ret;
E
Eric Dumazet 已提交
323
	struct neighbour __rcu **buckets;
324
	int i;
L
Linus Torvalds 已提交
325

326 327 328 329 330 331
	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
	if (!ret)
		return NULL;
	if (size <= PAGE_SIZE)
		buckets = kzalloc(size, GFP_ATOMIC);
	else
E
Eric Dumazet 已提交
332
		buckets = (struct neighbour __rcu **)
333 334 335 336 337
			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
					   get_order(size));
	if (!buckets) {
		kfree(ret);
		return NULL;
L
Linus Torvalds 已提交
338
	}
E
Eric Dumazet 已提交
339
	ret->hash_buckets = buckets;
340
	ret->hash_shift = shift;
341 342
	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
		neigh_get_hash_rnd(&ret->hash_rnd[i]);
L
Linus Torvalds 已提交
343 344 345
	return ret;
}

346
static void neigh_hash_free_rcu(struct rcu_head *head)
L
Linus Torvalds 已提交
347
{
348 349 350
	struct neigh_hash_table *nht = container_of(head,
						    struct neigh_hash_table,
						    rcu);
351
	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
E
Eric Dumazet 已提交
352
	struct neighbour __rcu **buckets = nht->hash_buckets;
L
Linus Torvalds 已提交
353 354

	if (size <= PAGE_SIZE)
355
		kfree(buckets);
L
Linus Torvalds 已提交
356
	else
357 358
		free_pages((unsigned long)buckets, get_order(size));
	kfree(nht);
L
Linus Torvalds 已提交
359 360
}

361
static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
362
						unsigned long new_shift)
L
Linus Torvalds 已提交
363
{
364 365
	unsigned int i, hash;
	struct neigh_hash_table *new_nht, *old_nht;
L
Linus Torvalds 已提交
366 367 368

	NEIGH_CACHE_STAT_INC(tbl, hash_grows);

369 370
	old_nht = rcu_dereference_protected(tbl->nht,
					    lockdep_is_held(&tbl->lock));
371
	new_nht = neigh_hash_alloc(new_shift);
372 373
	if (!new_nht)
		return old_nht;
L
Linus Torvalds 已提交
374

375
	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
L
Linus Torvalds 已提交
376 377
		struct neighbour *n, *next;

378 379
		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
						   lockdep_is_held(&tbl->lock));
380 381 382 383
		     n != NULL;
		     n = next) {
			hash = tbl->hash(n->primary_key, n->dev,
					 new_nht->hash_rnd);
L
Linus Torvalds 已提交
384

385
			hash >>= (32 - new_nht->hash_shift);
386 387 388 389 390 391 392 393
			next = rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock));

			rcu_assign_pointer(n->next,
					   rcu_dereference_protected(
						new_nht->hash_buckets[hash],
						lockdep_is_held(&tbl->lock)));
			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
L
Linus Torvalds 已提交
394 395 396
		}
	}

397 398 399
	rcu_assign_pointer(tbl->nht, new_nht);
	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
	return new_nht;
L
Linus Torvalds 已提交
400 401 402 403 404 405 406
}

struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
			       struct net_device *dev)
{
	struct neighbour *n;
	int key_len = tbl->key_len;
407
	u32 hash_val;
408
	struct neigh_hash_table *nht;
409

L
Linus Torvalds 已提交
410 411
	NEIGH_CACHE_STAT_INC(tbl, lookups);

412 413
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
414
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
415 416 417 418

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
L
Linus Torvalds 已提交
419
		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
420 421
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
L
Linus Torvalds 已提交
422 423 424 425
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
426

427
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
428 429
	return n;
}
430
EXPORT_SYMBOL(neigh_lookup);
L
Linus Torvalds 已提交
431

432 433
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
				     const void *pkey)
L
Linus Torvalds 已提交
434 435 436
{
	struct neighbour *n;
	int key_len = tbl->key_len;
437
	u32 hash_val;
438
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
439 440 441

	NEIGH_CACHE_STAT_INC(tbl, lookups);

442 443
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
444
	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
445 446 447 448

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
449
		if (!memcmp(n->primary_key, pkey, key_len) &&
450
		    net_eq(dev_net(n->dev), net)) {
451 452
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
L
Linus Torvalds 已提交
453 454 455 456
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
457

458
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
459 460
	return n;
}
461
EXPORT_SYMBOL(neigh_lookup_nodev);
L
Linus Torvalds 已提交
462

463 464
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
				 struct net_device *dev, bool want_ref)
L
Linus Torvalds 已提交
465 466 467 468
{
	u32 hash_val;
	int key_len = tbl->key_len;
	int error;
469
	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
470
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486

	if (!n) {
		rc = ERR_PTR(-ENOBUFS);
		goto out;
	}

	memcpy(n->primary_key, pkey, key_len);
	n->dev = dev;
	dev_hold(dev);

	/* Protocol specific setup. */
	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

487 488 489 490 491 492 493 494
	if (dev->netdev_ops->ndo_neigh_construct) {
		error = dev->netdev_ops->ndo_neigh_construct(n);
		if (error < 0) {
			rc = ERR_PTR(error);
			goto out_neigh_release;
		}
	}

495 496 497 498 499 500 501
	/* Device specific setup. */
	if (n->parms->neigh_setup &&
	    (error = n->parms->neigh_setup(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

J
Jiri Pirko 已提交
502
	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
L
Linus Torvalds 已提交
503 504

	write_lock_bh(&tbl->lock);
505 506
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
507

508 509
	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
L
Linus Torvalds 已提交
510

511
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
L
Linus Torvalds 已提交
512 513 514 515 516 517

	if (n->parms->dead) {
		rc = ERR_PTR(-EINVAL);
		goto out_tbl_unlock;
	}

518 519 520 521 522
	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
					    lockdep_is_held(&tbl->lock));
	     n1 != NULL;
	     n1 = rcu_dereference_protected(n1->next,
			lockdep_is_held(&tbl->lock))) {
L
Linus Torvalds 已提交
523
		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
524 525
			if (want_ref)
				neigh_hold(n1);
L
Linus Torvalds 已提交
526 527 528 529 530 531
			rc = n1;
			goto out_tbl_unlock;
		}
	}

	n->dead = 0;
532 533
	if (want_ref)
		neigh_hold(n);
534 535 536 537
	rcu_assign_pointer(n->next,
			   rcu_dereference_protected(nht->hash_buckets[hash_val],
						     lockdep_is_held(&tbl->lock)));
	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
L
Linus Torvalds 已提交
538
	write_unlock_bh(&tbl->lock);
539
	neigh_dbg(2, "neigh %p is created\n", n);
L
Linus Torvalds 已提交
540 541 542 543 544 545 546 547 548
	rc = n;
out:
	return rc;
out_tbl_unlock:
	write_unlock_bh(&tbl->lock);
out_neigh_release:
	neigh_release(n);
	goto out;
}
549
EXPORT_SYMBOL(__neigh_create);
L
Linus Torvalds 已提交
550

551
static u32 pneigh_hash(const void *pkey, int key_len)
552 553 554 555 556 557
{
	u32 hash_val = *(u32 *)(pkey + key_len - 4);
	hash_val ^= (hash_val >> 16);
	hash_val ^= hash_val >> 8;
	hash_val ^= hash_val >> 4;
	hash_val &= PNEIGH_HASHMASK;
558 559
	return hash_val;
}
560

561 562 563 564 565 566 567
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
					      struct net *net,
					      const void *pkey,
					      int key_len,
					      struct net_device *dev)
{
	while (n) {
568
		if (!memcmp(n->key, pkey, key_len) &&
569
		    net_eq(pneigh_net(n), net) &&
570
		    (n->dev == dev || !n->dev))
571 572
			return n;
		n = n->next;
573
	}
574 575
	return NULL;
}
576

577 578 579 580 581 582 583 584
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
		struct net *net, const void *pkey, struct net_device *dev)
{
	int key_len = tbl->key_len;
	u32 hash_val = pneigh_hash(pkey, key_len);

	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
				 net, pkey, key_len, dev);
585
}
586
EXPORT_SYMBOL_GPL(__pneigh_lookup);
587

588 589
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
				    struct net *net, const void *pkey,
L
Linus Torvalds 已提交
590 591 592 593
				    struct net_device *dev, int creat)
{
	struct pneigh_entry *n;
	int key_len = tbl->key_len;
594
	u32 hash_val = pneigh_hash(pkey, key_len);
L
Linus Torvalds 已提交
595 596

	read_lock_bh(&tbl->lock);
597 598
	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
			      net, pkey, key_len, dev);
L
Linus Torvalds 已提交
599
	read_unlock_bh(&tbl->lock);
600 601

	if (n || !creat)
L
Linus Torvalds 已提交
602 603
		goto out;

604 605
	ASSERT_RTNL();

L
Linus Torvalds 已提交
606 607 608 609
	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
	if (!n)
		goto out;

E
Eric Dumazet 已提交
610
	write_pnet(&n->net, hold_net(net));
L
Linus Torvalds 已提交
611 612 613 614 615 616 617 618
	memcpy(n->key, pkey, key_len);
	n->dev = dev;
	if (dev)
		dev_hold(dev);

	if (tbl->pconstructor && tbl->pconstructor(n)) {
		if (dev)
			dev_put(dev);
619
		release_net(net);
L
Linus Torvalds 已提交
620 621 622 623 624 625 626 627 628 629 630 631
		kfree(n);
		n = NULL;
		goto out;
	}

	write_lock_bh(&tbl->lock);
	n->next = tbl->phash_buckets[hash_val];
	tbl->phash_buckets[hash_val] = n;
	write_unlock_bh(&tbl->lock);
out:
	return n;
}
632
EXPORT_SYMBOL(pneigh_lookup);
L
Linus Torvalds 已提交
633 634


635
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
L
Linus Torvalds 已提交
636 637 638 639
		  struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	int key_len = tbl->key_len;
640
	u32 hash_val = pneigh_hash(pkey, key_len);
L
Linus Torvalds 已提交
641 642 643 644

	write_lock_bh(&tbl->lock);
	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
	     np = &n->next) {
645
		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
646
		    net_eq(pneigh_net(n), net)) {
L
Linus Torvalds 已提交
647 648 649 650 651 652
			*np = n->next;
			write_unlock_bh(&tbl->lock);
			if (tbl->pdestructor)
				tbl->pdestructor(n);
			if (n->dev)
				dev_put(n->dev);
653
			release_net(pneigh_net(n));
L
Linus Torvalds 已提交
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
			kfree(n);
			return 0;
		}
	}
	write_unlock_bh(&tbl->lock);
	return -ENOENT;
}

static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	u32 h;

	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
		np = &tbl->phash_buckets[h];
		while ((n = *np) != NULL) {
			if (!dev || n->dev == dev) {
				*np = n->next;
				if (tbl->pdestructor)
					tbl->pdestructor(n);
				if (n->dev)
					dev_put(n->dev);
676
				release_net(pneigh_net(n));
L
Linus Torvalds 已提交
677 678 679 680 681 682 683 684 685
				kfree(n);
				continue;
			}
			np = &n->next;
		}
	}
	return -ENOENT;
}

686 687 688 689 690 691 692
static void neigh_parms_destroy(struct neigh_parms *parms);

static inline void neigh_parms_put(struct neigh_parms *parms)
{
	if (atomic_dec_and_test(&parms->refcnt))
		neigh_parms_destroy(parms);
}
L
Linus Torvalds 已提交
693 694 695 696 697 698 699

/*
 *	neighbour must already be out of the table;
 *
 */
void neigh_destroy(struct neighbour *neigh)
{
700 701
	struct net_device *dev = neigh->dev;

L
Linus Torvalds 已提交
702 703 704
	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);

	if (!neigh->dead) {
J
Joe Perches 已提交
705
		pr_warn("Destroying alive neighbour %p\n", neigh);
L
Linus Torvalds 已提交
706 707 708 709 710
		dump_stack();
		return;
	}

	if (neigh_del_timer(neigh))
J
Joe Perches 已提交
711
		pr_warn("Impossible event\n");
L
Linus Torvalds 已提交
712

713 714 715
	write_lock_bh(&neigh->lock);
	__skb_queue_purge(&neigh->arp_queue);
	write_unlock_bh(&neigh->lock);
E
Eric Dumazet 已提交
716
	neigh->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
717

718 719 720
	if (dev->netdev_ops->ndo_neigh_destroy)
		dev->netdev_ops->ndo_neigh_destroy(neigh);

721
	dev_put(dev);
L
Linus Torvalds 已提交
722 723
	neigh_parms_put(neigh->parms);

724
	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
L
Linus Torvalds 已提交
725 726

	atomic_dec(&neigh->tbl->entries);
727
	kfree_rcu(neigh, rcu);
L
Linus Torvalds 已提交
728
}
729
EXPORT_SYMBOL(neigh_destroy);
L
Linus Torvalds 已提交
730 731 732 733 734 735 736 737

/* Neighbour state is suspicious;
   disable fast path.

   Called with write_locked neigh.
 */
static void neigh_suspect(struct neighbour *neigh)
{
738
	neigh_dbg(2, "neigh %p is suspected\n", neigh);
L
Linus Torvalds 已提交
739 740 741 742 743 744 745 746 747 748 749

	neigh->output = neigh->ops->output;
}

/* Neighbour state is OK;
   enable fast path.

   Called with write_locked neigh.
 */
static void neigh_connect(struct neighbour *neigh)
{
750
	neigh_dbg(2, "neigh %p is connected\n", neigh);
L
Linus Torvalds 已提交
751 752 753 754

	neigh->output = neigh->ops->connected_output;
}

755
static void neigh_periodic_work(struct work_struct *work)
L
Linus Torvalds 已提交
756
{
757
	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
758 759
	struct neighbour *n;
	struct neighbour __rcu **np;
760
	unsigned int i;
761
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
762 763 764

	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);

765
	write_lock_bh(&tbl->lock);
766 767
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
768

769 770 771
	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
		goto out;

L
Linus Torvalds 已提交
772 773 774 775
	/*
	 *	periodically recompute ReachableTime from random function
	 */

776
	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
L
Linus Torvalds 已提交
777
		struct neigh_parms *p;
778
		tbl->last_rand = jiffies;
L
Linus Torvalds 已提交
779 780
		for (p = &tbl->parms; p; p = p->next)
			p->reachable_time =
J
Jiri Pirko 已提交
781
				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
L
Linus Torvalds 已提交
782 783
	}

784
	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
785
		np = &nht->hash_buckets[i];
L
Linus Torvalds 已提交
786

787 788
		while ((n = rcu_dereference_protected(*np,
				lockdep_is_held(&tbl->lock))) != NULL) {
789
			unsigned int state;
L
Linus Torvalds 已提交
790

791
			write_lock(&n->lock);
L
Linus Torvalds 已提交
792

793 794 795 796 797
			state = n->nud_state;
			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
				write_unlock(&n->lock);
				goto next_elt;
			}
L
Linus Torvalds 已提交
798

799 800
			if (time_before(n->used, n->confirmed))
				n->used = n->confirmed;
L
Linus Torvalds 已提交
801

802 803
			if (atomic_read(&n->refcnt) == 1 &&
			    (state == NUD_FAILED ||
J
Jiri Pirko 已提交
804
			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
805 806 807 808 809 810
				*np = n->next;
				n->dead = 1;
				write_unlock(&n->lock);
				neigh_cleanup_and_release(n);
				continue;
			}
L
Linus Torvalds 已提交
811 812 813
			write_unlock(&n->lock);

next_elt:
814 815 816 817 818 819 820 821 822
			np = &n->next;
		}
		/*
		 * It's fine to release lock here, even if hash table
		 * grows while we are preempted.
		 */
		write_unlock_bh(&tbl->lock);
		cond_resched();
		write_lock_bh(&tbl->lock);
823 824
		nht = rcu_dereference_protected(tbl->nht,
						lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
825
	}
826
out:
J
Jiri Pirko 已提交
827 828 829
	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
	 * BASE_REACHABLE_TIME.
L
Linus Torvalds 已提交
830
	 */
831
	schedule_delayed_work(&tbl->gc_work,
J
Jiri Pirko 已提交
832
			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
833
	write_unlock_bh(&tbl->lock);
L
Linus Torvalds 已提交
834 835 836 837 838
}

static __inline__ int neigh_max_probes(struct neighbour *n)
{
	struct neigh_parms *p = n->parms;
E
Eric Dumazet 已提交
839
	return (n->nud_state & NUD_PROBE) ?
J
Jiri Pirko 已提交
840 841 842
		NEIGH_VAR(p, UCAST_PROBES) :
		NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
		NEIGH_VAR(p, MCAST_PROBES);
L
Linus Torvalds 已提交
843 844
}

845
static void neigh_invalidate(struct neighbour *neigh)
E
Eric Dumazet 已提交
846 847
	__releases(neigh->lock)
	__acquires(neigh->lock)
848 849 850 851
{
	struct sk_buff *skb;

	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
852
	neigh_dbg(2, "neigh %p is failed\n", neigh);
853 854 855 856 857 858 859 860 861 862 863 864 865
	neigh->updated = jiffies;

	/* It is very thin place. report_unreachable is very complicated
	   routine. Particularly, it can hit the same neighbour entry!

	   So that, we try to be accurate and avoid dead loop. --ANK
	 */
	while (neigh->nud_state == NUD_FAILED &&
	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
		write_unlock(&neigh->lock);
		neigh->ops->error_report(neigh, skb);
		write_lock(&neigh->lock);
	}
866
	__skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
867
	neigh->arp_queue_len_bytes = 0;
868 869
}

E
Eric Dumazet 已提交
870 871 872
static void neigh_probe(struct neighbour *neigh)
	__releases(neigh->lock)
{
873
	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
E
Eric Dumazet 已提交
874 875 876 877 878 879 880 881 882
	/* keep skb alive even if arp_queue overflows */
	if (skb)
		skb = skb_copy(skb, GFP_ATOMIC);
	write_unlock(&neigh->lock);
	neigh->ops->solicit(neigh, skb);
	atomic_inc(&neigh->probes);
	kfree_skb(skb);
}

L
Linus Torvalds 已提交
883 884 885 886 887 888
/* Called when a timer expires for a neighbour entry. */

static void neigh_timer_handler(unsigned long arg)
{
	unsigned long now, next;
	struct neighbour *neigh = (struct neighbour *)arg;
889
	unsigned int state;
L
Linus Torvalds 已提交
890 891 892 893 894 895 896 897
	int notify = 0;

	write_lock(&neigh->lock);

	state = neigh->nud_state;
	now = jiffies;
	next = now + HZ;

898
	if (!(state & NUD_IN_TIMER))
L
Linus Torvalds 已提交
899 900 901
		goto out;

	if (state & NUD_REACHABLE) {
902
		if (time_before_eq(now,
L
Linus Torvalds 已提交
903
				   neigh->confirmed + neigh->parms->reachable_time)) {
904
			neigh_dbg(2, "neigh %p is still alive\n", neigh);
L
Linus Torvalds 已提交
905 906
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else if (time_before_eq(now,
J
Jiri Pirko 已提交
907 908
					  neigh->used +
					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
909
			neigh_dbg(2, "neigh %p is delayed\n", neigh);
L
Linus Torvalds 已提交
910
			neigh->nud_state = NUD_DELAY;
911
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
912
			neigh_suspect(neigh);
J
Jiri Pirko 已提交
913
			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
L
Linus Torvalds 已提交
914
		} else {
915
			neigh_dbg(2, "neigh %p is suspected\n", neigh);
L
Linus Torvalds 已提交
916
			neigh->nud_state = NUD_STALE;
917
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
918
			neigh_suspect(neigh);
919
			notify = 1;
L
Linus Torvalds 已提交
920 921
		}
	} else if (state & NUD_DELAY) {
922
		if (time_before_eq(now,
J
Jiri Pirko 已提交
923 924
				   neigh->confirmed +
				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
925
			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
L
Linus Torvalds 已提交
926
			neigh->nud_state = NUD_REACHABLE;
927
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
928
			neigh_connect(neigh);
929
			notify = 1;
L
Linus Torvalds 已提交
930 931
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else {
932
			neigh_dbg(2, "neigh %p is probed\n", neigh);
L
Linus Torvalds 已提交
933
			neigh->nud_state = NUD_PROBE;
934
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
935
			atomic_set(&neigh->probes, 0);
J
Jiri Pirko 已提交
936
			next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
L
Linus Torvalds 已提交
937 938 939
		}
	} else {
		/* NUD_PROBE|NUD_INCOMPLETE */
J
Jiri Pirko 已提交
940
		next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
L
Linus Torvalds 已提交
941 942 943 944 945 946
	}

	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
		neigh->nud_state = NUD_FAILED;
		notify = 1;
947
		neigh_invalidate(neigh);
L
Linus Torvalds 已提交
948 949 950 951 952
	}

	if (neigh->nud_state & NUD_IN_TIMER) {
		if (time_before(next, jiffies + HZ/2))
			next = jiffies + HZ/2;
953 954
		if (!mod_timer(&neigh->timer, next))
			neigh_hold(neigh);
L
Linus Torvalds 已提交
955 956
	}
	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
E
Eric Dumazet 已提交
957
		neigh_probe(neigh);
958
	} else {
959
out:
960 961
		write_unlock(&neigh->lock);
	}
T
Thomas Graf 已提交
962

963
	if (notify)
T
Thomas Graf 已提交
964
		neigh_update_notify(neigh);
L
Linus Torvalds 已提交
965 966 967 968 969 970 971

	neigh_release(neigh);
}

int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	int rc;
E
Eric Dumazet 已提交
972
	bool immediate_probe = false;
L
Linus Torvalds 已提交
973 974 975 976 977 978 979 980

	write_lock_bh(&neigh->lock);

	rc = 0;
	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
		goto out_unlock_bh;

	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
J
Jiri Pirko 已提交
981 982
		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
E
Eric Dumazet 已提交
983 984
			unsigned long next, now = jiffies;

J
Jiri Pirko 已提交
985 986
			atomic_set(&neigh->probes,
				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
L
Linus Torvalds 已提交
987
			neigh->nud_state     = NUD_INCOMPLETE;
E
Eric Dumazet 已提交
988
			neigh->updated = now;
J
Jiri Pirko 已提交
989 990
			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
					 HZ/2);
E
Eric Dumazet 已提交
991 992
			neigh_add_timer(neigh, next);
			immediate_probe = true;
L
Linus Torvalds 已提交
993 994
		} else {
			neigh->nud_state = NUD_FAILED;
995
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
996 997
			write_unlock_bh(&neigh->lock);

998
			kfree_skb(skb);
L
Linus Torvalds 已提交
999 1000 1001
			return 1;
		}
	} else if (neigh->nud_state & NUD_STALE) {
1002
		neigh_dbg(2, "neigh %p is delayed\n", neigh);
L
Linus Torvalds 已提交
1003
		neigh->nud_state = NUD_DELAY;
1004
		neigh->updated = jiffies;
J
Jiri Pirko 已提交
1005 1006
		neigh_add_timer(neigh, jiffies +
				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
L
Linus Torvalds 已提交
1007 1008 1009 1010
	}

	if (neigh->nud_state == NUD_INCOMPLETE) {
		if (skb) {
E
Eric Dumazet 已提交
1011
			while (neigh->arp_queue_len_bytes + skb->truesize >
J
Jiri Pirko 已提交
1012
			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
L
Linus Torvalds 已提交
1013
				struct sk_buff *buff;
E
Eric Dumazet 已提交
1014

1015
				buff = __skb_dequeue(&neigh->arp_queue);
E
Eric Dumazet 已提交
1016 1017 1018
				if (!buff)
					break;
				neigh->arp_queue_len_bytes -= buff->truesize;
L
Linus Torvalds 已提交
1019
				kfree_skb(buff);
1020
				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
L
Linus Torvalds 已提交
1021
			}
E
Eric Dumazet 已提交
1022
			skb_dst_force(skb);
L
Linus Torvalds 已提交
1023
			__skb_queue_tail(&neigh->arp_queue, skb);
E
Eric Dumazet 已提交
1024
			neigh->arp_queue_len_bytes += skb->truesize;
L
Linus Torvalds 已提交
1025 1026 1027 1028
		}
		rc = 1;
	}
out_unlock_bh:
E
Eric Dumazet 已提交
1029 1030 1031 1032 1033
	if (immediate_probe)
		neigh_probe(neigh);
	else
		write_unlock(&neigh->lock);
	local_bh_enable();
L
Linus Torvalds 已提交
1034 1035
	return rc;
}
1036
EXPORT_SYMBOL(__neigh_event_send);
L
Linus Torvalds 已提交
1037

1038
static void neigh_update_hhs(struct neighbour *neigh)
L
Linus Torvalds 已提交
1039 1040
{
	struct hh_cache *hh;
1041
	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
D
Doug Kehn 已提交
1042 1043 1044 1045
		= NULL;

	if (neigh->dev->header_ops)
		update = neigh->dev->header_ops->cache_update;
L
Linus Torvalds 已提交
1046 1047

	if (update) {
1048 1049
		hh = &neigh->hh;
		if (hh->hh_len) {
1050
			write_seqlock_bh(&hh->hh_lock);
L
Linus Torvalds 已提交
1051
			update(hh, neigh->dev, neigh->ha);
1052
			write_sequnlock_bh(&hh->hh_lock);
L
Linus Torvalds 已提交
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
		}
	}
}



/* Generic update routine.
   -- lladdr is new lladdr or NULL, if it is not supplied.
   -- new    is new state.
   -- flags
	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
				if it is different.
	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1066
				lladdr instead of overriding it
L
Linus Torvalds 已提交
1067 1068 1069 1070 1071
				if it is different.
				It also allows to retain current state
				if lladdr is unchanged.
	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.

1072
	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
L
Linus Torvalds 已提交
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
				NTF_ROUTER flag.
	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
				a router.

   Caller MUST hold reference count on the entry.
 */

int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
		 u32 flags)
{
	u8 old;
	int err;
	int notify = 0;
	struct net_device *dev;
	int update_isrouter = 0;

	write_lock_bh(&neigh->lock);

	dev    = neigh->dev;
	old    = neigh->nud_state;
	err    = -EPERM;

1095
	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
L
Linus Torvalds 已提交
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
	    (old & (NUD_NOARP | NUD_PERMANENT)))
		goto out;

	if (!(new & NUD_VALID)) {
		neigh_del_timer(neigh);
		if (old & NUD_CONNECTED)
			neigh_suspect(neigh);
		neigh->nud_state = new;
		err = 0;
		notify = old & NUD_VALID;
1106 1107 1108 1109 1110
		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
		    (new & NUD_FAILED)) {
			neigh_invalidate(neigh);
			notify = 1;
		}
L
Linus Torvalds 已提交
1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123
		goto out;
	}

	/* Compare new lladdr with cached one */
	if (!dev->addr_len) {
		/* First case: device needs no address. */
		lladdr = neigh->ha;
	} else if (lladdr) {
		/* The second case: if something is already cached
		   and a new address is proposed:
		   - compare new & old
		   - if they are different, check override flag
		 */
1124
		if ((old & NUD_VALID) &&
L
Linus Torvalds 已提交
1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
		    !memcmp(lladdr, neigh->ha, dev->addr_len))
			lladdr = neigh->ha;
	} else {
		/* No address is supplied; if we know something,
		   use it, otherwise discard the request.
		 */
		err = -EINVAL;
		if (!(old & NUD_VALID))
			goto out;
		lladdr = neigh->ha;
	}

	if (new & NUD_CONNECTED)
		neigh->confirmed = jiffies;
	neigh->updated = jiffies;

	/* If entry was valid and address is not changed,
	   do not change entry state, if new one is STALE.
	 */
	err = 0;
	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
	if (old & NUD_VALID) {
		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
			update_isrouter = 0;
			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
			    (old & NUD_CONNECTED)) {
				lladdr = neigh->ha;
				new = NUD_STALE;
			} else
				goto out;
		} else {
			if (lladdr == neigh->ha && new == NUD_STALE &&
			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
			     (old & NUD_CONNECTED))
			    )
				new = old;
		}
	}

	if (new != old) {
		neigh_del_timer(neigh);
1166
		if (new & NUD_IN_TIMER)
1167 1168
			neigh_add_timer(neigh, (jiffies +
						((new & NUD_REACHABLE) ?
1169 1170
						 neigh->parms->reachable_time :
						 0)));
L
Linus Torvalds 已提交
1171 1172 1173 1174
		neigh->nud_state = new;
	}

	if (lladdr != neigh->ha) {
1175
		write_seqlock(&neigh->ha_lock);
L
Linus Torvalds 已提交
1176
		memcpy(&neigh->ha, lladdr, dev->addr_len);
1177
		write_sequnlock(&neigh->ha_lock);
L
Linus Torvalds 已提交
1178 1179 1180
		neigh_update_hhs(neigh);
		if (!(new & NUD_CONNECTED))
			neigh->confirmed = jiffies -
J
Jiri Pirko 已提交
1181
				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
L
Linus Torvalds 已提交
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
		notify = 1;
	}
	if (new == old)
		goto out;
	if (new & NUD_CONNECTED)
		neigh_connect(neigh);
	else
		neigh_suspect(neigh);
	if (!(old & NUD_VALID)) {
		struct sk_buff *skb;

		/* Again: avoid dead loop if something went wrong */

		while (neigh->nud_state & NUD_VALID &&
		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1197 1198
			struct dst_entry *dst = skb_dst(skb);
			struct neighbour *n2, *n1 = neigh;
L
Linus Torvalds 已提交
1199
			write_unlock_bh(&neigh->lock);
1200 1201

			rcu_read_lock();
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215

			/* Why not just use 'neigh' as-is?  The problem is that
			 * things such as shaper, eql, and sch_teql can end up
			 * using alternative, different, neigh objects to output
			 * the packet in the output path.  So what we need to do
			 * here is re-lookup the top-level neigh in the path so
			 * we can reinject the packet there.
			 */
			n2 = NULL;
			if (dst) {
				n2 = dst_neigh_lookup_skb(dst, skb);
				if (n2)
					n1 = n2;
			}
1216
			n1->output(n1, skb);
1217 1218
			if (n2)
				neigh_release(n2);
1219 1220
			rcu_read_unlock();

L
Linus Torvalds 已提交
1221 1222
			write_lock_bh(&neigh->lock);
		}
1223
		__skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
1224
		neigh->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
1225 1226 1227 1228 1229 1230 1231 1232
	}
out:
	if (update_isrouter) {
		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
			(neigh->flags | NTF_ROUTER) :
			(neigh->flags & ~NTF_ROUTER);
	}
	write_unlock_bh(&neigh->lock);
1233 1234

	if (notify)
T
Thomas Graf 已提交
1235 1236
		neigh_update_notify(neigh);

L
Linus Torvalds 已提交
1237 1238
	return err;
}
1239
EXPORT_SYMBOL(neigh_update);
L
Linus Torvalds 已提交
1240 1241 1242 1243 1244 1245 1246 1247

struct neighbour *neigh_event_ns(struct neigh_table *tbl,
				 u8 *lladdr, void *saddr,
				 struct net_device *dev)
{
	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
						 lladdr || !dev->addr_len);
	if (neigh)
1248
		neigh_update(neigh, lladdr, NUD_STALE,
L
Linus Torvalds 已提交
1249 1250 1251
			     NEIGH_UPDATE_F_OVERRIDE);
	return neigh;
}
1252
EXPORT_SYMBOL(neigh_event_ns);
L
Linus Torvalds 已提交
1253

E
Eric Dumazet 已提交
1254
/* called with read_lock_bh(&n->lock); */
1255
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
L
Linus Torvalds 已提交
1256 1257
{
	struct net_device *dev = dst->dev;
1258 1259
	__be16 prot = dst->ops->protocol;
	struct hh_cache	*hh = &n->hh;
1260 1261

	write_lock_bh(&n->lock);
E
Eric Dumazet 已提交
1262

1263 1264 1265
	/* Only one thread can come in here and initialize the
	 * hh_cache entry.
	 */
1266 1267
	if (!hh->hh_len)
		dev->header_ops->cache(n, hh, prot);
E
Eric Dumazet 已提交
1268

1269
	write_unlock_bh(&n->lock);
L
Linus Torvalds 已提交
1270 1271 1272
}

/* This function can be used in contexts, where only old dev_queue_xmit
1273 1274
 * worked, f.e. if you want to override normal output path (eql, shaper),
 * but resolution is not made yet.
L
Linus Torvalds 已提交
1275 1276
 */

1277
int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1278 1279 1280
{
	struct net_device *dev = skb->dev;

1281
	__skb_pull(skb, skb_network_offset(skb));
L
Linus Torvalds 已提交
1282

1283 1284
	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
			    skb->len) < 0 &&
1285
	    dev->header_ops->rebuild(skb))
L
Linus Torvalds 已提交
1286 1287 1288 1289
		return 0;

	return dev_queue_xmit(skb);
}
1290
EXPORT_SYMBOL(neigh_compat_output);
L
Linus Torvalds 已提交
1291 1292 1293

/* Slow and careful. */

1294
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1295
{
E
Eric Dumazet 已提交
1296
	struct dst_entry *dst = skb_dst(skb);
L
Linus Torvalds 已提交
1297 1298
	int rc = 0;

1299
	if (!dst)
L
Linus Torvalds 已提交
1300 1301 1302 1303 1304
		goto discard;

	if (!neigh_event_send(neigh, skb)) {
		int err;
		struct net_device *dev = neigh->dev;
1305
		unsigned int seq;
E
Eric Dumazet 已提交
1306

1307 1308
		if (dev->header_ops->cache && !neigh->hh.hh_len)
			neigh_hh_init(neigh, dst);
E
Eric Dumazet 已提交
1309

1310
		do {
1311
			__skb_pull(skb, skb_network_offset(skb));
1312 1313 1314 1315
			seq = read_seqbegin(&neigh->ha_lock);
			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
					      neigh->ha, NULL, skb->len);
		} while (read_seqretry(&neigh->ha_lock, seq));
E
Eric Dumazet 已提交
1316

L
Linus Torvalds 已提交
1317
		if (err >= 0)
1318
			rc = dev_queue_xmit(skb);
L
Linus Torvalds 已提交
1319 1320 1321 1322 1323 1324
		else
			goto out_kfree_skb;
	}
out:
	return rc;
discard:
1325
	neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
L
Linus Torvalds 已提交
1326 1327 1328 1329 1330
out_kfree_skb:
	rc = -EINVAL;
	kfree_skb(skb);
	goto out;
}
1331
EXPORT_SYMBOL(neigh_resolve_output);
L
Linus Torvalds 已提交
1332 1333 1334

/* As fast as possible without hh cache */

1335
int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1336 1337
{
	struct net_device *dev = neigh->dev;
1338
	unsigned int seq;
1339
	int err;
L
Linus Torvalds 已提交
1340

1341
	do {
1342
		__skb_pull(skb, skb_network_offset(skb));
1343 1344 1345 1346 1347
		seq = read_seqbegin(&neigh->ha_lock);
		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
				      neigh->ha, NULL, skb->len);
	} while (read_seqretry(&neigh->ha_lock, seq));

L
Linus Torvalds 已提交
1348
	if (err >= 0)
1349
		err = dev_queue_xmit(skb);
L
Linus Torvalds 已提交
1350 1351 1352 1353 1354 1355
	else {
		err = -EINVAL;
		kfree_skb(skb);
	}
	return err;
}
1356
EXPORT_SYMBOL(neigh_connected_output);
L
Linus Torvalds 已提交
1357

1358 1359 1360 1361 1362 1363
int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
{
	return dev_queue_xmit(skb);
}
EXPORT_SYMBOL(neigh_direct_output);

L
Linus Torvalds 已提交
1364 1365 1366 1367 1368
static void neigh_proxy_process(unsigned long arg)
{
	struct neigh_table *tbl = (struct neigh_table *)arg;
	long sched_next = 0;
	unsigned long now = jiffies;
1369
	struct sk_buff *skb, *n;
L
Linus Torvalds 已提交
1370 1371 1372

	spin_lock(&tbl->proxy_queue.lock);

1373 1374
	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
		long tdif = NEIGH_CB(skb)->sched_next - now;
L
Linus Torvalds 已提交
1375 1376

		if (tdif <= 0) {
1377
			struct net_device *dev = skb->dev;
1378

1379
			__skb_unlink(skb, &tbl->proxy_queue);
1380 1381
			if (tbl->proxy_redo && netif_running(dev)) {
				rcu_read_lock();
1382
				tbl->proxy_redo(skb);
1383 1384
				rcu_read_unlock();
			} else {
1385
				kfree_skb(skb);
1386
			}
L
Linus Torvalds 已提交
1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401

			dev_put(dev);
		} else if (!sched_next || tdif < sched_next)
			sched_next = tdif;
	}
	del_timer(&tbl->proxy_timer);
	if (sched_next)
		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
	spin_unlock(&tbl->proxy_queue.lock);
}

void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
		    struct sk_buff *skb)
{
	unsigned long now = jiffies;
J
Jiri Pirko 已提交
1402 1403
	unsigned long sched_next = now + (net_random() %
					  NEIGH_VAR(p, PROXY_DELAY));
L
Linus Torvalds 已提交
1404

J
Jiri Pirko 已提交
1405
	if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
L
Linus Torvalds 已提交
1406 1407 1408
		kfree_skb(skb);
		return;
	}
1409 1410 1411

	NEIGH_CB(skb)->sched_next = sched_next;
	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
L
Linus Torvalds 已提交
1412 1413 1414 1415 1416 1417

	spin_lock(&tbl->proxy_queue.lock);
	if (del_timer(&tbl->proxy_timer)) {
		if (time_before(tbl->proxy_timer.expires, sched_next))
			sched_next = tbl->proxy_timer.expires;
	}
E
Eric Dumazet 已提交
1418
	skb_dst_drop(skb);
L
Linus Torvalds 已提交
1419 1420 1421 1422 1423
	dev_hold(skb->dev);
	__skb_queue_tail(&tbl->proxy_queue, skb);
	mod_timer(&tbl->proxy_timer, sched_next);
	spin_unlock(&tbl->proxy_queue.lock);
}
1424
EXPORT_SYMBOL(pneigh_enqueue);
L
Linus Torvalds 已提交
1425

1426
static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1427 1428 1429 1430 1431
						      struct net *net, int ifindex)
{
	struct neigh_parms *p;

	for (p = &tbl->parms; p; p = p->next) {
1432
		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1433
		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1434 1435 1436 1437 1438
			return p;
	}

	return NULL;
}
L
Linus Torvalds 已提交
1439 1440 1441 1442

struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
				      struct neigh_table *tbl)
{
1443
	struct neigh_parms *p;
1444 1445
	struct net *net = dev_net(dev);
	const struct net_device_ops *ops = dev->netdev_ops;
1446

1447
	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
1448 1449 1450 1451
	if (p) {
		p->tbl		  = tbl;
		atomic_set(&p->refcnt, 1);
		p->reachable_time =
J
Jiri Pirko 已提交
1452
				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1453 1454 1455 1456
		dev_hold(dev);
		p->dev = dev;
		write_pnet(&p->net, hold_net(net));
		p->sysctl_table = NULL;
1457

1458
		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1459 1460
			release_net(net);
			dev_put(dev);
1461 1462
			kfree(p);
			return NULL;
L
Linus Torvalds 已提交
1463
		}
1464

L
Linus Torvalds 已提交
1465 1466 1467 1468
		write_lock_bh(&tbl->lock);
		p->next		= tbl->parms.next;
		tbl->parms.next = p;
		write_unlock_bh(&tbl->lock);
1469 1470

		neigh_parms_data_state_cleanall(p);
L
Linus Torvalds 已提交
1471 1472 1473
	}
	return p;
}
1474
EXPORT_SYMBOL(neigh_parms_alloc);
L
Linus Torvalds 已提交
1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495

static void neigh_rcu_free_parms(struct rcu_head *head)
{
	struct neigh_parms *parms =
		container_of(head, struct neigh_parms, rcu_head);

	neigh_parms_put(parms);
}

void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
	struct neigh_parms **p;

	if (!parms || parms == &tbl->parms)
		return;
	write_lock_bh(&tbl->lock);
	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
		if (*p == parms) {
			*p = parms->next;
			parms->dead = 1;
			write_unlock_bh(&tbl->lock);
1496 1497
			if (parms->dev)
				dev_put(parms->dev);
L
Linus Torvalds 已提交
1498 1499 1500 1501 1502
			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
			return;
		}
	}
	write_unlock_bh(&tbl->lock);
1503
	neigh_dbg(1, "%s: not found\n", __func__);
L
Linus Torvalds 已提交
1504
}
1505
EXPORT_SYMBOL(neigh_parms_release);
L
Linus Torvalds 已提交
1506

1507
static void neigh_parms_destroy(struct neigh_parms *parms)
L
Linus Torvalds 已提交
1508
{
1509
	release_net(neigh_parms_net(parms));
L
Linus Torvalds 已提交
1510 1511 1512
	kfree(parms);
}

1513 1514
static struct lock_class_key neigh_table_proxy_queue_class;

1515
static void neigh_table_init_no_netlink(struct neigh_table *tbl)
L
Linus Torvalds 已提交
1516 1517 1518 1519
{
	unsigned long now = jiffies;
	unsigned long phsize;

E
Eric Dumazet 已提交
1520
	write_pnet(&tbl->parms.net, &init_net);
L
Linus Torvalds 已提交
1521 1522
	atomic_set(&tbl->parms.refcnt, 1);
	tbl->parms.reachable_time =
J
Jiri Pirko 已提交
1523
			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
L
Linus Torvalds 已提交
1524 1525 1526 1527

	tbl->stats = alloc_percpu(struct neigh_statistics);
	if (!tbl->stats)
		panic("cannot create neighbour cache statistics");
1528

L
Linus Torvalds 已提交
1529
#ifdef CONFIG_PROC_FS
1530 1531
	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
			      &neigh_stat_seq_fops, tbl))
L
Linus Torvalds 已提交
1532 1533 1534
		panic("cannot create neighbour proc dir entry");
#endif

1535
	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
L
Linus Torvalds 已提交
1536 1537

	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
A
Andrew Morton 已提交
1538
	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
L
Linus Torvalds 已提交
1539

1540
	if (!tbl->nht || !tbl->phash_buckets)
L
Linus Torvalds 已提交
1541 1542
		panic("cannot allocate neighbour cache hashes");

1543 1544 1545 1546 1547 1548
	if (!tbl->entry_size)
		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
					tbl->key_len, NEIGH_PRIV_ALIGN);
	else
		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);

L
Linus Torvalds 已提交
1549
	rwlock_init(&tbl->lock);
1550
	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1551
	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1552
	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1553 1554
	skb_queue_head_init_class(&tbl->proxy_queue,
			&neigh_table_proxy_queue_class);
L
Linus Torvalds 已提交
1555 1556 1557

	tbl->last_flush = now;
	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1558 1559 1560 1561 1562 1563 1564
}

void neigh_table_init(struct neigh_table *tbl)
{
	struct neigh_table *tmp;

	neigh_table_init_no_netlink(tbl);
L
Linus Torvalds 已提交
1565
	write_lock(&neigh_tbl_lock);
1566 1567 1568 1569
	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
		if (tmp->family == tbl->family)
			break;
	}
L
Linus Torvalds 已提交
1570 1571 1572
	tbl->next	= neigh_tables;
	neigh_tables	= tbl;
	write_unlock(&neigh_tbl_lock);
1573 1574

	if (unlikely(tmp)) {
J
Joe Perches 已提交
1575 1576
		pr_err("Registering multiple tables for family %d\n",
		       tbl->family);
1577 1578
		dump_stack();
	}
L
Linus Torvalds 已提交
1579
}
1580
EXPORT_SYMBOL(neigh_table_init);
L
Linus Torvalds 已提交
1581 1582 1583 1584 1585 1586

int neigh_table_clear(struct neigh_table *tbl)
{
	struct neigh_table **tp;

	/* It is not clean... Fix it to unload IPv6 module safely */
1587
	cancel_delayed_work_sync(&tbl->gc_work);
L
Linus Torvalds 已提交
1588 1589 1590 1591
	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	neigh_ifdown(tbl, NULL);
	if (atomic_read(&tbl->entries))
J
Joe Perches 已提交
1592
		pr_crit("neighbour leakage\n");
L
Linus Torvalds 已提交
1593 1594 1595 1596 1597 1598 1599 1600 1601
	write_lock(&neigh_tbl_lock);
	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
		if (*tp == tbl) {
			*tp = tbl->next;
			break;
		}
	}
	write_unlock(&neigh_tbl_lock);

E
Eric Dumazet 已提交
1602 1603
	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
		 neigh_hash_free_rcu);
1604
	tbl->nht = NULL;
L
Linus Torvalds 已提交
1605 1606 1607 1608

	kfree(tbl->phash_buckets);
	tbl->phash_buckets = NULL;

1609 1610
	remove_proc_entry(tbl->id, init_net.proc_net_stat);

1611 1612 1613
	free_percpu(tbl->stats);
	tbl->stats = NULL;

L
Linus Torvalds 已提交
1614 1615
	return 0;
}
1616
EXPORT_SYMBOL(neigh_table_clear);
L
Linus Torvalds 已提交
1617

1618
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
L
Linus Torvalds 已提交
1619
{
1620
	struct net *net = sock_net(skb->sk);
1621 1622
	struct ndmsg *ndm;
	struct nlattr *dst_attr;
L
Linus Torvalds 已提交
1623 1624
	struct neigh_table *tbl;
	struct net_device *dev = NULL;
1625
	int err = -EINVAL;
L
Linus Torvalds 已提交
1626

1627
	ASSERT_RTNL();
1628
	if (nlmsg_len(nlh) < sizeof(*ndm))
L
Linus Torvalds 已提交
1629 1630
		goto out;

1631 1632 1633 1634 1635 1636
	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
	if (dst_attr == NULL)
		goto out;

	ndm = nlmsg_data(nlh);
	if (ndm->ndm_ifindex) {
1637
		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1638 1639 1640 1641 1642 1643
		if (dev == NULL) {
			err = -ENODEV;
			goto out;
		}
	}

L
Linus Torvalds 已提交
1644 1645
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1646
		struct neighbour *neigh;
L
Linus Torvalds 已提交
1647 1648 1649 1650 1651

		if (tbl->family != ndm->ndm_family)
			continue;
		read_unlock(&neigh_tbl_lock);

1652
		if (nla_len(dst_attr) < tbl->key_len)
1653
			goto out;
L
Linus Torvalds 已提交
1654 1655

		if (ndm->ndm_flags & NTF_PROXY) {
1656
			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1657
			goto out;
L
Linus Torvalds 已提交
1658 1659
		}

1660
		if (dev == NULL)
1661
			goto out;
L
Linus Torvalds 已提交
1662

1663 1664 1665
		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
		if (neigh == NULL) {
			err = -ENOENT;
1666
			goto out;
L
Linus Torvalds 已提交
1667
		}
1668 1669 1670 1671 1672

		err = neigh_update(neigh, NULL, NUD_FAILED,
				   NEIGH_UPDATE_F_OVERRIDE |
				   NEIGH_UPDATE_F_ADMIN);
		neigh_release(neigh);
1673
		goto out;
L
Linus Torvalds 已提交
1674 1675
	}
	read_unlock(&neigh_tbl_lock);
1676 1677
	err = -EAFNOSUPPORT;

L
Linus Torvalds 已提交
1678 1679 1680 1681
out:
	return err;
}

1682
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
L
Linus Torvalds 已提交
1683
{
1684
	struct net *net = sock_net(skb->sk);
1685 1686
	struct ndmsg *ndm;
	struct nlattr *tb[NDA_MAX+1];
L
Linus Torvalds 已提交
1687 1688
	struct neigh_table *tbl;
	struct net_device *dev = NULL;
1689
	int err;
L
Linus Torvalds 已提交
1690

1691
	ASSERT_RTNL();
1692 1693
	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
	if (err < 0)
L
Linus Torvalds 已提交
1694 1695
		goto out;

1696 1697 1698 1699 1700 1701
	err = -EINVAL;
	if (tb[NDA_DST] == NULL)
		goto out;

	ndm = nlmsg_data(nlh);
	if (ndm->ndm_ifindex) {
1702
		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1703 1704 1705 1706 1707 1708
		if (dev == NULL) {
			err = -ENODEV;
			goto out;
		}

		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1709
			goto out;
1710 1711
	}

L
Linus Torvalds 已提交
1712 1713
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1714 1715 1716
		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
		struct neighbour *neigh;
		void *dst, *lladdr;
L
Linus Torvalds 已提交
1717 1718 1719 1720 1721

		if (tbl->family != ndm->ndm_family)
			continue;
		read_unlock(&neigh_tbl_lock);

1722
		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1723
			goto out;
1724 1725
		dst = nla_data(tb[NDA_DST]);
		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
L
Linus Torvalds 已提交
1726 1727

		if (ndm->ndm_flags & NTF_PROXY) {
1728 1729 1730
			struct pneigh_entry *pn;

			err = -ENOBUFS;
1731
			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1732 1733 1734 1735
			if (pn) {
				pn->flags = ndm->ndm_flags;
				err = 0;
			}
1736
			goto out;
L
Linus Torvalds 已提交
1737 1738
		}

1739
		if (dev == NULL)
1740
			goto out;
1741 1742 1743 1744 1745

		neigh = neigh_lookup(tbl, dst, dev);
		if (neigh == NULL) {
			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
				err = -ENOENT;
1746
				goto out;
1747
			}
1748

1749 1750 1751
			neigh = __neigh_lookup_errno(tbl, dst, dev);
			if (IS_ERR(neigh)) {
				err = PTR_ERR(neigh);
1752
				goto out;
L
Linus Torvalds 已提交
1753 1754
			}
		} else {
1755 1756 1757
			if (nlh->nlmsg_flags & NLM_F_EXCL) {
				err = -EEXIST;
				neigh_release(neigh);
1758
				goto out;
L
Linus Torvalds 已提交
1759 1760
			}

1761 1762 1763
			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
		}
L
Linus Torvalds 已提交
1764

1765 1766 1767 1768 1769
		if (ndm->ndm_flags & NTF_USE) {
			neigh_event_send(neigh, NULL);
			err = 0;
		} else
			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1770
		neigh_release(neigh);
1771
		goto out;
L
Linus Torvalds 已提交
1772 1773 1774
	}

	read_unlock(&neigh_tbl_lock);
1775
	err = -EAFNOSUPPORT;
L
Linus Torvalds 已提交
1776 1777 1778 1779
out:
	return err;
}

1780 1781
static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
1782 1783 1784 1785 1786
	struct nlattr *nest;

	nest = nla_nest_start(skb, NDTA_PARMS);
	if (nest == NULL)
		return -ENOBUFS;
1787

1788 1789 1790
	if ((parms->dev &&
	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
J
Jiri Pirko 已提交
1791 1792
	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1793 1794
	    /* approximative value for deprecated QUEUE_LEN (in packets) */
	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
J
Jiri Pirko 已提交
1795 1796 1797 1798 1799 1800 1801
			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
			NEIGH_VAR(parms, UCAST_PROBES)) ||
	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
			NEIGH_VAR(parms, MCAST_PROBES)) ||
1802 1803
	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
J
Jiri Pirko 已提交
1804 1805 1806
			  NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
			  NEIGH_VAR(parms, GC_STALETIME)) ||
1807
	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
J
Jiri Pirko 已提交
1808 1809 1810 1811 1812 1813 1814 1815 1816
			  NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
			  NEIGH_VAR(parms, RETRANS_TIME)) ||
	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
			  NEIGH_VAR(parms, ANYCAST_DELAY)) ||
	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
			  NEIGH_VAR(parms, PROXY_DELAY)) ||
	    nla_put_msecs(skb, NDTPA_LOCKTIME,
			  NEIGH_VAR(parms, LOCKTIME)))
1817
		goto nla_put_failure;
1818
	return nla_nest_end(skb, nest);
1819

1820
nla_put_failure:
1821 1822
	nla_nest_cancel(skb, nest);
	return -EMSGSIZE;
1823 1824
}

1825 1826
static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
			      u32 pid, u32 seq, int type, int flags)
1827 1828 1829 1830
{
	struct nlmsghdr *nlh;
	struct ndtmsg *ndtmsg;

1831 1832
	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
	if (nlh == NULL)
1833
		return -EMSGSIZE;
1834

1835
	ndtmsg = nlmsg_data(nlh);
1836 1837 1838

	read_lock_bh(&tbl->lock);
	ndtmsg->ndtm_family = tbl->family;
1839 1840
	ndtmsg->ndtm_pad1   = 0;
	ndtmsg->ndtm_pad2   = 0;
1841

1842 1843 1844 1845 1846 1847
	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
		goto nla_put_failure;
1848 1849 1850 1851
	{
		unsigned long now = jiffies;
		unsigned int flush_delta = now - tbl->last_flush;
		unsigned int rand_delta = now - tbl->last_rand;
1852
		struct neigh_hash_table *nht;
1853 1854 1855 1856 1857 1858 1859 1860 1861
		struct ndt_config ndc = {
			.ndtc_key_len		= tbl->key_len,
			.ndtc_entry_size	= tbl->entry_size,
			.ndtc_entries		= atomic_read(&tbl->entries),
			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
		};

1862 1863
		rcu_read_lock_bh();
		nht = rcu_dereference_bh(tbl->nht);
1864
		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1865
		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1866 1867
		rcu_read_unlock_bh();

1868 1869
		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
			goto nla_put_failure;
1870 1871 1872 1873 1874 1875 1876 1877
	}

	{
		int cpu;
		struct ndt_stats ndst;

		memset(&ndst, 0, sizeof(ndst));

1878
		for_each_possible_cpu(cpu) {
1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
			struct neigh_statistics	*st;

			st = per_cpu_ptr(tbl->stats, cpu);
			ndst.ndts_allocs		+= st->allocs;
			ndst.ndts_destroys		+= st->destroys;
			ndst.ndts_hash_grows		+= st->hash_grows;
			ndst.ndts_res_failed		+= st->res_failed;
			ndst.ndts_lookups		+= st->lookups;
			ndst.ndts_hits			+= st->hits;
			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
		}

1894 1895
		if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
			goto nla_put_failure;
1896 1897 1898 1899
	}

	BUG_ON(tbl->parms.dev);
	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1900
		goto nla_put_failure;
1901 1902

	read_unlock_bh(&tbl->lock);
1903
	return nlmsg_end(skb, nlh);
1904

1905
nla_put_failure:
1906
	read_unlock_bh(&tbl->lock);
1907 1908
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1909 1910
}

1911 1912
static int neightbl_fill_param_info(struct sk_buff *skb,
				    struct neigh_table *tbl,
1913
				    struct neigh_parms *parms,
1914 1915
				    u32 pid, u32 seq, int type,
				    unsigned int flags)
1916 1917 1918 1919
{
	struct ndtmsg *ndtmsg;
	struct nlmsghdr *nlh;

1920 1921
	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
	if (nlh == NULL)
1922
		return -EMSGSIZE;
1923

1924
	ndtmsg = nlmsg_data(nlh);
1925 1926 1927

	read_lock_bh(&tbl->lock);
	ndtmsg->ndtm_family = tbl->family;
1928 1929
	ndtmsg->ndtm_pad1   = 0;
	ndtmsg->ndtm_pad2   = 0;
1930

1931 1932 1933
	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
	    neightbl_fill_parms(skb, parms) < 0)
		goto errout;
1934 1935

	read_unlock_bh(&tbl->lock);
1936 1937
	return nlmsg_end(skb, nlh);
errout:
1938
	read_unlock_bh(&tbl->lock);
1939 1940
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1941
}
1942

1943
static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1944 1945 1946 1947 1948 1949 1950 1951
	[NDTA_NAME]		= { .type = NLA_STRING },
	[NDTA_THRESH1]		= { .type = NLA_U32 },
	[NDTA_THRESH2]		= { .type = NLA_U32 },
	[NDTA_THRESH3]		= { .type = NLA_U32 },
	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
	[NDTA_PARMS]		= { .type = NLA_NESTED },
};

1952
static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967
	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
};

1968
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1969
{
1970
	struct net *net = sock_net(skb->sk);
1971
	struct neigh_table *tbl;
1972 1973 1974
	struct ndtmsg *ndtmsg;
	struct nlattr *tb[NDTA_MAX+1];
	int err;
1975

1976 1977 1978 1979
	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
			  nl_neightbl_policy);
	if (err < 0)
		goto errout;
1980

1981 1982 1983 1984 1985 1986
	if (tb[NDTA_NAME] == NULL) {
		err = -EINVAL;
		goto errout;
	}

	ndtmsg = nlmsg_data(nlh);
1987 1988 1989 1990 1991
	read_lock(&neigh_tbl_lock);
	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
			continue;

1992
		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1993 1994 1995 1996 1997
			break;
	}

	if (tbl == NULL) {
		err = -ENOENT;
1998
		goto errout_locked;
1999 2000
	}

2001
	/*
2002 2003 2004 2005 2006
	 * We acquire tbl->lock to be nice to the periodic timers and
	 * make sure they always see a consistent set of values.
	 */
	write_lock_bh(&tbl->lock);

2007 2008
	if (tb[NDTA_PARMS]) {
		struct nlattr *tbp[NDTPA_MAX+1];
2009
		struct neigh_parms *p;
2010
		int i, ifindex = 0;
2011

2012 2013 2014 2015
		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
				       nl_ntbl_parm_policy);
		if (err < 0)
			goto errout_tbl_lock;
2016

2017 2018
		if (tbp[NDTPA_IFINDEX])
			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2019

2020
		p = lookup_neigh_parms(tbl, net, ifindex);
2021 2022
		if (p == NULL) {
			err = -ENOENT;
2023
			goto errout_tbl_lock;
2024 2025
		}

2026 2027 2028
		for (i = 1; i <= NDTPA_MAX; i++) {
			if (tbp[i] == NULL)
				continue;
2029

2030 2031
			switch (i) {
			case NDTPA_QUEUE_LEN:
J
Jiri Pirko 已提交
2032 2033 2034
				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
					      nla_get_u32(tbp[i]) *
					      SKB_TRUESIZE(ETH_FRAME_LEN));
E
Eric Dumazet 已提交
2035 2036
				break;
			case NDTPA_QUEUE_LENBYTES:
J
Jiri Pirko 已提交
2037 2038
				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
					      nla_get_u32(tbp[i]));
2039 2040
				break;
			case NDTPA_PROXY_QLEN:
J
Jiri Pirko 已提交
2041 2042
				NEIGH_VAR_SET(p, PROXY_QLEN,
					      nla_get_u32(tbp[i]));
2043 2044
				break;
			case NDTPA_APP_PROBES:
J
Jiri Pirko 已提交
2045 2046
				NEIGH_VAR_SET(p, APP_PROBES,
					      nla_get_u32(tbp[i]));
2047 2048
				break;
			case NDTPA_UCAST_PROBES:
J
Jiri Pirko 已提交
2049 2050
				NEIGH_VAR_SET(p, UCAST_PROBES,
					      nla_get_u32(tbp[i]));
2051 2052
				break;
			case NDTPA_MCAST_PROBES:
J
Jiri Pirko 已提交
2053 2054
				NEIGH_VAR_SET(p, MCAST_PROBES,
					      nla_get_u32(tbp[i]));
2055 2056
				break;
			case NDTPA_BASE_REACHABLE_TIME:
J
Jiri Pirko 已提交
2057 2058
				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
					      nla_get_msecs(tbp[i]));
2059 2060
				break;
			case NDTPA_GC_STALETIME:
J
Jiri Pirko 已提交
2061 2062
				NEIGH_VAR_SET(p, GC_STALETIME,
					      nla_get_msecs(tbp[i]));
2063 2064
				break;
			case NDTPA_DELAY_PROBE_TIME:
J
Jiri Pirko 已提交
2065 2066
				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
					      nla_get_msecs(tbp[i]));
2067 2068
				break;
			case NDTPA_RETRANS_TIME:
J
Jiri Pirko 已提交
2069 2070
				NEIGH_VAR_SET(p, RETRANS_TIME,
					      nla_get_msecs(tbp[i]));
2071 2072
				break;
			case NDTPA_ANYCAST_DELAY:
J
Jiri Pirko 已提交
2073
				NEIGH_VAR_SET(p, ANYCAST_DELAY, nla_get_msecs(tbp[i]));
2074 2075
				break;
			case NDTPA_PROXY_DELAY:
J
Jiri Pirko 已提交
2076
				NEIGH_VAR_SET(p, PROXY_DELAY, nla_get_msecs(tbp[i]));
2077 2078
				break;
			case NDTPA_LOCKTIME:
J
Jiri Pirko 已提交
2079
				NEIGH_VAR_SET(p, LOCKTIME, nla_get_msecs(tbp[i]));
2080 2081 2082 2083
				break;
			}
		}
	}
2084

2085 2086 2087 2088 2089 2090
	err = -ENOENT;
	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
	    !net_eq(net, &init_net))
		goto errout_tbl_lock;

2091 2092
	if (tb[NDTA_THRESH1])
		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2093

2094 2095
	if (tb[NDTA_THRESH2])
		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2096

2097 2098
	if (tb[NDTA_THRESH3])
		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2099

2100 2101
	if (tb[NDTA_GC_INTERVAL])
		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2102 2103 2104

	err = 0;

2105
errout_tbl_lock:
2106
	write_unlock_bh(&tbl->lock);
2107
errout_locked:
2108
	read_unlock(&neigh_tbl_lock);
2109
errout:
2110 2111 2112
	return err;
}

2113
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2114
{
2115
	struct net *net = sock_net(skb->sk);
2116 2117 2118
	int family, tidx, nidx = 0;
	int tbl_skip = cb->args[0];
	int neigh_skip = cb->args[1];
2119 2120
	struct neigh_table *tbl;

2121
	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2122 2123

	read_lock(&neigh_tbl_lock);
2124
	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2125 2126
		struct neigh_parms *p;

2127
		if (tidx < tbl_skip || (family && tbl->family != family))
2128 2129
			continue;

2130
		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2131 2132
				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
				       NLM_F_MULTI) <= 0)
2133 2134
			break;

2135
		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2136
			if (!net_eq(neigh_parms_net(p), net))
2137 2138
				continue;

2139 2140
			if (nidx < neigh_skip)
				goto next;
2141

2142
			if (neightbl_fill_param_info(skb, tbl, p,
2143
						     NETLINK_CB(cb->skb).portid,
2144 2145 2146
						     cb->nlh->nlmsg_seq,
						     RTM_NEWNEIGHTBL,
						     NLM_F_MULTI) <= 0)
2147
				goto out;
2148 2149
		next:
			nidx++;
2150 2151
		}

2152
		neigh_skip = 0;
2153 2154 2155
	}
out:
	read_unlock(&neigh_tbl_lock);
2156 2157
	cb->args[0] = tidx;
	cb->args[1] = nidx;
2158 2159 2160

	return skb->len;
}
L
Linus Torvalds 已提交
2161

2162 2163
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
			   u32 pid, u32 seq, int type, unsigned int flags)
L
Linus Torvalds 已提交
2164 2165 2166
{
	unsigned long now = jiffies;
	struct nda_cacheinfo ci;
2167 2168 2169 2170 2171
	struct nlmsghdr *nlh;
	struct ndmsg *ndm;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
	if (nlh == NULL)
2172
		return -EMSGSIZE;
L
Linus Torvalds 已提交
2173

2174 2175
	ndm = nlmsg_data(nlh);
	ndm->ndm_family	 = neigh->ops->family;
2176 2177
	ndm->ndm_pad1    = 0;
	ndm->ndm_pad2    = 0;
2178 2179 2180
	ndm->ndm_flags	 = neigh->flags;
	ndm->ndm_type	 = neigh->type;
	ndm->ndm_ifindex = neigh->dev->ifindex;
L
Linus Torvalds 已提交
2181

2182 2183
	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
		goto nla_put_failure;
2184 2185 2186

	read_lock_bh(&neigh->lock);
	ndm->ndm_state	 = neigh->nud_state;
2187 2188 2189 2190 2191 2192 2193 2194
	if (neigh->nud_state & NUD_VALID) {
		char haddr[MAX_ADDR_LEN];

		neigh_ha_snapshot(haddr, neigh, neigh->dev);
		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
			read_unlock_bh(&neigh->lock);
			goto nla_put_failure;
		}
2195 2196
	}

2197 2198 2199
	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2200 2201 2202
	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
	read_unlock_bh(&neigh->lock);

2203 2204 2205
	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
		goto nla_put_failure;
2206 2207 2208 2209

	return nlmsg_end(skb, nlh);

nla_put_failure:
2210 2211
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
L
Linus Torvalds 已提交
2212 2213
}

2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233
static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
			    u32 pid, u32 seq, int type, unsigned int flags,
			    struct neigh_table *tbl)
{
	struct nlmsghdr *nlh;
	struct ndmsg *ndm;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
	if (nlh == NULL)
		return -EMSGSIZE;

	ndm = nlmsg_data(nlh);
	ndm->ndm_family	 = tbl->family;
	ndm->ndm_pad1    = 0;
	ndm->ndm_pad2    = 0;
	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
	ndm->ndm_type	 = NDA_DST;
	ndm->ndm_ifindex = pn->dev->ifindex;
	ndm->ndm_state	 = NUD_NONE;

2234 2235
	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
		goto nla_put_failure;
2236 2237 2238 2239 2240 2241 2242 2243

	return nlmsg_end(skb, nlh);

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

T
Thomas Graf 已提交
2244 2245 2246 2247 2248
static void neigh_update_notify(struct neighbour *neigh)
{
	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
}
L
Linus Torvalds 已提交
2249 2250 2251 2252

static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
			    struct netlink_callback *cb)
{
2253
	struct net *net = sock_net(skb->sk);
L
Linus Torvalds 已提交
2254 2255 2256
	struct neighbour *n;
	int rc, h, s_h = cb->args[1];
	int idx, s_idx = idx = cb->args[2];
2257
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2258

2259 2260 2261
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);

2262
	for (h = s_h; h < (1 << nht->hash_shift); h++) {
L
Linus Torvalds 已提交
2263 2264
		if (h > s_h)
			s_idx = 0;
2265 2266 2267
		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
		     n != NULL;
		     n = rcu_dereference_bh(n->next)) {
O
Octavian Purdila 已提交
2268
			if (!net_eq(dev_net(n->dev), net))
2269
				continue;
2270 2271
			if (idx < s_idx)
				goto next;
2272
			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
L
Linus Torvalds 已提交
2273
					    cb->nlh->nlmsg_seq,
2274 2275
					    RTM_NEWNEIGH,
					    NLM_F_MULTI) <= 0) {
L
Linus Torvalds 已提交
2276 2277 2278
				rc = -1;
				goto out;
			}
2279
next:
2280
			idx++;
L
Linus Torvalds 已提交
2281 2282 2283 2284
		}
	}
	rc = skb->len;
out:
2285
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2286 2287 2288 2289 2290
	cb->args[1] = h;
	cb->args[2] = idx;
	return rc;
}

2291 2292 2293 2294 2295 2296 2297 2298 2299 2300
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
			     struct netlink_callback *cb)
{
	struct pneigh_entry *n;
	struct net *net = sock_net(skb->sk);
	int rc, h, s_h = cb->args[3];
	int idx, s_idx = idx = cb->args[4];

	read_lock_bh(&tbl->lock);

2301
	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2302 2303 2304 2305 2306 2307 2308
		if (h > s_h)
			s_idx = 0;
		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
			if (dev_net(n->dev) != net)
				continue;
			if (idx < s_idx)
				goto next;
2309
			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330
					    cb->nlh->nlmsg_seq,
					    RTM_NEWNEIGH,
					    NLM_F_MULTI, tbl) <= 0) {
				read_unlock_bh(&tbl->lock);
				rc = -1;
				goto out;
			}
		next:
			idx++;
		}
	}

	read_unlock_bh(&tbl->lock);
	rc = skb->len;
out:
	cb->args[3] = h;
	cb->args[4] = idx;
	return rc;

}

2331
static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
L
Linus Torvalds 已提交
2332 2333 2334
{
	struct neigh_table *tbl;
	int t, family, s_t;
2335
	int proxy = 0;
2336
	int err;
L
Linus Torvalds 已提交
2337 2338

	read_lock(&neigh_tbl_lock);
2339
	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2340 2341 2342 2343 2344 2345 2346 2347

	/* check for full ndmsg structure presence, family member is
	 * the same for both structures
	 */
	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
		proxy = 1;

L
Linus Torvalds 已提交
2348 2349
	s_t = cb->args[0];

2350
	for (tbl = neigh_tables, t = 0; tbl;
2351
	     tbl = tbl->next, t++) {
L
Linus Torvalds 已提交
2352 2353 2354 2355 2356
		if (t < s_t || (family && tbl->family != family))
			continue;
		if (t > s_t)
			memset(&cb->args[1], 0, sizeof(cb->args) -
						sizeof(cb->args[0]));
2357 2358 2359 2360
		if (proxy)
			err = pneigh_dump_table(tbl, skb, cb);
		else
			err = neigh_dump_table(tbl, skb, cb);
2361 2362
		if (err < 0)
			break;
L
Linus Torvalds 已提交
2363 2364 2365 2366 2367 2368 2369 2370 2371 2372
	}
	read_unlock(&neigh_tbl_lock);

	cb->args[0] = t;
	return skb->len;
}

void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
	int chain;
2373
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2374

2375 2376 2377
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);

2378
	read_lock(&tbl->lock); /* avoid resizes */
2379
	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
L
Linus Torvalds 已提交
2380 2381
		struct neighbour *n;

2382 2383 2384
		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
		     n != NULL;
		     n = rcu_dereference_bh(n->next))
L
Linus Torvalds 已提交
2385 2386
			cb(n, cookie);
	}
2387 2388
	read_unlock(&tbl->lock);
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2389 2390 2391 2392 2393 2394 2395 2396
}
EXPORT_SYMBOL(neigh_for_each);

/* The tbl->lock must be held as a writer and BH disabled. */
void __neigh_for_each_release(struct neigh_table *tbl,
			      int (*cb)(struct neighbour *))
{
	int chain;
2397
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2398

2399 2400
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
2401
	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2402 2403
		struct neighbour *n;
		struct neighbour __rcu **np;
L
Linus Torvalds 已提交
2404

2405
		np = &nht->hash_buckets[chain];
2406 2407
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
2408 2409 2410 2411 2412
			int release;

			write_lock(&n->lock);
			release = cb(n);
			if (release) {
2413 2414 2415
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
2416 2417 2418 2419
				n->dead = 1;
			} else
				np = &n->next;
			write_unlock(&n->lock);
2420 2421
			if (release)
				neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
2422 2423 2424 2425 2426 2427 2428 2429 2430 2431
		}
	}
}
EXPORT_SYMBOL(__neigh_for_each_release);

#ifdef CONFIG_PROC_FS

static struct neighbour *neigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
2432
	struct net *net = seq_file_net(seq);
2433
	struct neigh_hash_table *nht = state->nht;
L
Linus Torvalds 已提交
2434 2435 2436 2437
	struct neighbour *n = NULL;
	int bucket = state->bucket;

	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2438
	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2439
		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
L
Linus Torvalds 已提交
2440 2441

		while (n) {
2442
			if (!net_eq(dev_net(n->dev), net))
2443
				goto next;
L
Linus Torvalds 已提交
2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455
			if (state->neigh_sub_iter) {
				loff_t fakep = 0;
				void *v;

				v = state->neigh_sub_iter(state, n, &fakep);
				if (!v)
					goto next;
			}
			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
				break;
			if (n->nud_state & ~NUD_NOARP)
				break;
2456 2457
next:
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472
		}

		if (n)
			break;
	}
	state->bucket = bucket;

	return n;
}

static struct neighbour *neigh_get_next(struct seq_file *seq,
					struct neighbour *n,
					loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
2473
	struct net *net = seq_file_net(seq);
2474
	struct neigh_hash_table *nht = state->nht;
L
Linus Torvalds 已提交
2475 2476 2477 2478 2479 2480

	if (state->neigh_sub_iter) {
		void *v = state->neigh_sub_iter(state, n, pos);
		if (v)
			return n;
	}
2481
	n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2482 2483 2484

	while (1) {
		while (n) {
2485
			if (!net_eq(dev_net(n->dev), net))
2486
				goto next;
L
Linus Torvalds 已提交
2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497
			if (state->neigh_sub_iter) {
				void *v = state->neigh_sub_iter(state, n, pos);
				if (v)
					return n;
				goto next;
			}
			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
				break;

			if (n->nud_state & ~NUD_NOARP)
				break;
2498 2499
next:
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2500 2501 2502 2503 2504
		}

		if (n)
			break;

2505
		if (++state->bucket >= (1 << nht->hash_shift))
L
Linus Torvalds 已提交
2506 2507
			break;

2508
		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
L
Linus Torvalds 已提交
2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520
	}

	if (n && pos)
		--(*pos);
	return n;
}

static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
{
	struct neighbour *n = neigh_get_first(seq);

	if (n) {
2521
		--(*pos);
L
Linus Torvalds 已提交
2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533
		while (*pos) {
			n = neigh_get_next(seq, n, pos);
			if (!n)
				break;
		}
	}
	return *pos ? NULL : n;
}

static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
2534
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
2535 2536 2537 2538 2539 2540 2541
	struct neigh_table *tbl = state->tbl;
	struct pneigh_entry *pn = NULL;
	int bucket = state->bucket;

	state->flags |= NEIGH_SEQ_IS_PNEIGH;
	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
		pn = tbl->phash_buckets[bucket];
2542
		while (pn && !net_eq(pneigh_net(pn), net))
2543
			pn = pn->next;
L
Linus Torvalds 已提交
2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556
		if (pn)
			break;
	}
	state->bucket = bucket;

	return pn;
}

static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
					    struct pneigh_entry *pn,
					    loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
2557
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
2558 2559
	struct neigh_table *tbl = state->tbl;

2560 2561 2562 2563
	do {
		pn = pn->next;
	} while (pn && !net_eq(pneigh_net(pn), net));

L
Linus Torvalds 已提交
2564 2565 2566 2567
	while (!pn) {
		if (++state->bucket > PNEIGH_HASHMASK)
			break;
		pn = tbl->phash_buckets[state->bucket];
2568
		while (pn && !net_eq(pneigh_net(pn), net))
2569
			pn = pn->next;
L
Linus Torvalds 已提交
2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584
		if (pn)
			break;
	}

	if (pn && pos)
		--(*pos);

	return pn;
}

static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
{
	struct pneigh_entry *pn = pneigh_get_first(seq);

	if (pn) {
2585
		--(*pos);
L
Linus Torvalds 已提交
2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598
		while (*pos) {
			pn = pneigh_get_next(seq, pn, pos);
			if (!pn)
				break;
		}
	}
	return *pos ? NULL : pn;
}

static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
	void *rc;
2599
	loff_t idxpos = *pos;
L
Linus Torvalds 已提交
2600

2601
	rc = neigh_get_idx(seq, &idxpos);
L
Linus Torvalds 已提交
2602
	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2603
		rc = pneigh_get_idx(seq, &idxpos);
L
Linus Torvalds 已提交
2604 2605 2606 2607 2608

	return rc;
}

void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2609
	__acquires(rcu_bh)
L
Linus Torvalds 已提交
2610 2611 2612 2613 2614 2615 2616
{
	struct neigh_seq_state *state = seq->private;

	state->tbl = tbl;
	state->bucket = 0;
	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);

2617 2618
	rcu_read_lock_bh();
	state->nht = rcu_dereference_bh(tbl->nht);
2619

2620
	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
L
Linus Torvalds 已提交
2621 2622 2623 2624 2625 2626 2627 2628 2629
}
EXPORT_SYMBOL(neigh_seq_start);

void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct neigh_seq_state *state;
	void *rc;

	if (v == SEQ_START_TOKEN) {
2630
		rc = neigh_get_first(seq);
L
Linus Torvalds 已提交
2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651
		goto out;
	}

	state = seq->private;
	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
		rc = neigh_get_next(seq, v, NULL);
		if (rc)
			goto out;
		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
			rc = pneigh_get_first(seq);
	} else {
		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
		rc = pneigh_get_next(seq, v, NULL);
	}
out:
	++(*pos);
	return rc;
}
EXPORT_SYMBOL(neigh_seq_next);

void neigh_seq_stop(struct seq_file *seq, void *v)
2652
	__releases(rcu_bh)
L
Linus Torvalds 已提交
2653
{
2654
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2655 2656 2657 2658 2659 2660 2661
}
EXPORT_SYMBOL(neigh_seq_stop);

/* statistics via seq_file */

static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
A
Alexey Dobriyan 已提交
2662
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2663 2664 2665 2666
	int cpu;

	if (*pos == 0)
		return SEQ_START_TOKEN;
2667

2668
	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
L
Linus Torvalds 已提交
2669 2670 2671 2672 2673 2674 2675 2676 2677 2678
		if (!cpu_possible(cpu))
			continue;
		*pos = cpu+1;
		return per_cpu_ptr(tbl->stats, cpu);
	}
	return NULL;
}

static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
A
Alexey Dobriyan 已提交
2679
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2680 2681
	int cpu;

2682
	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
L
Linus Torvalds 已提交
2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697
		if (!cpu_possible(cpu))
			continue;
		*pos = cpu+1;
		return per_cpu_ptr(tbl->stats, cpu);
	}
	return NULL;
}

static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
{

}

static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
A
Alexey Dobriyan 已提交
2698
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2699 2700 2701
	struct neigh_statistics *st = v;

	if (v == SEQ_START_TOKEN) {
2702
		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
L
Linus Torvalds 已提交
2703 2704 2705 2706
		return 0;
	}

	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2707
			"%08lx %08lx  %08lx %08lx %08lx\n",
L
Linus Torvalds 已提交
2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722
		   atomic_read(&tbl->entries),

		   st->allocs,
		   st->destroys,
		   st->hash_grows,

		   st->lookups,
		   st->hits,

		   st->res_failed,

		   st->rcv_probes_mcast,
		   st->rcv_probes_ucast,

		   st->periodic_gc_runs,
2723 2724
		   st->forced_gc_runs,
		   st->unres_discards
L
Linus Torvalds 已提交
2725 2726 2727 2728 2729
		   );

	return 0;
}

2730
static const struct seq_operations neigh_stat_seq_ops = {
L
Linus Torvalds 已提交
2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742
	.start	= neigh_stat_seq_start,
	.next	= neigh_stat_seq_next,
	.stop	= neigh_stat_seq_stop,
	.show	= neigh_stat_seq_show,
};

static int neigh_stat_seq_open(struct inode *inode, struct file *file)
{
	int ret = seq_open(file, &neigh_stat_seq_ops);

	if (!ret) {
		struct seq_file *sf = file->private_data;
A
Al Viro 已提交
2743
		sf->private = PDE_DATA(inode);
L
Linus Torvalds 已提交
2744 2745 2746 2747
	}
	return ret;
};

2748
static const struct file_operations neigh_stat_seq_fops = {
L
Linus Torvalds 已提交
2749 2750 2751 2752 2753 2754 2755 2756 2757
	.owner	 = THIS_MODULE,
	.open 	 = neigh_stat_seq_open,
	.read	 = seq_read,
	.llseek	 = seq_lseek,
	.release = seq_release,
};

#endif /* CONFIG_PROC_FS */

2758 2759 2760 2761 2762 2763 2764 2765 2766
static inline size_t neigh_nlmsg_size(void)
{
	return NLMSG_ALIGN(sizeof(struct ndmsg))
	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
	       + nla_total_size(sizeof(struct nda_cacheinfo))
	       + nla_total_size(4); /* NDA_PROBES */
}

2767
static void __neigh_notify(struct neighbour *n, int type, int flags)
L
Linus Torvalds 已提交
2768
{
2769
	struct net *net = dev_net(n->dev);
2770
	struct sk_buff *skb;
2771
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
2772

2773
	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2774
	if (skb == NULL)
2775
		goto errout;
L
Linus Torvalds 已提交
2776

2777
	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2778 2779 2780 2781 2782 2783
	if (err < 0) {
		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
2784 2785
	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
	return;
2786 2787
errout:
	if (err < 0)
2788
		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
L
Linus Torvalds 已提交
2789 2790
}

2791
void neigh_app_ns(struct neighbour *n)
L
Linus Torvalds 已提交
2792
{
2793 2794
	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
}
2795
EXPORT_SYMBOL(neigh_app_ns);
L
Linus Torvalds 已提交
2796 2797

#ifdef CONFIG_SYSCTL
2798
static int zero;
2799
static int int_max = INT_MAX;
2800
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
L
Linus Torvalds 已提交
2801

2802 2803
static int proc_unres_qlen(struct ctl_table *ctl, int write,
			   void __user *buffer, size_t *lenp, loff_t *ppos)
E
Eric Dumazet 已提交
2804 2805
{
	int size, ret;
2806
	struct ctl_table tmp = *ctl;
E
Eric Dumazet 已提交
2807

2808 2809
	tmp.extra1 = &zero;
	tmp.extra2 = &unres_qlen_max;
E
Eric Dumazet 已提交
2810
	tmp.data = &size;
2811 2812 2813 2814

	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

E
Eric Dumazet 已提交
2815 2816 2817 2818 2819
	if (write && !ret)
		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
	return ret;
}

2820 2821 2822
static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
						   int family)
{
2823 2824
	switch (family) {
	case AF_INET:
2825
		return __in_dev_arp_parms_get_rcu(dev);
2826 2827 2828
	case AF_INET6:
		return __in6_dev_nd_parms_get_rcu(dev);
	}
2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852
	return NULL;
}

static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
				  int index)
{
	struct net_device *dev;
	int family = neigh_parms_family(p);

	rcu_read_lock();
	for_each_netdev_rcu(net, dev) {
		struct neigh_parms *dst_p =
				neigh_get_dev_parms_rcu(dev, family);

		if (dst_p && !test_bit(index, dst_p->data_state))
			dst_p->data[index] = p->data[index];
	}
	rcu_read_unlock();
}

static void neigh_proc_update(struct ctl_table *ctl, int write)
{
	struct net_device *dev = ctl->extra1;
	struct neigh_parms *p = ctl->extra2;
2853
	struct net *net = neigh_parms_net(p);
2854 2855 2856 2857 2858 2859 2860 2861 2862 2863
	int index = (int *) ctl->data - p->data;

	if (!write)
		return;

	set_bit(index, p->data_state);
	if (!dev) /* NULL dev means this is default value */
		neigh_copy_dflt_parms(net, p, index);
}

J
Jiri Pirko 已提交
2864 2865 2866 2867 2868
static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
					   void __user *buffer,
					   size_t *lenp, loff_t *ppos)
{
	struct ctl_table tmp = *ctl;
2869
	int ret;
J
Jiri Pirko 已提交
2870 2871 2872 2873

	tmp.extra1 = &zero;
	tmp.extra2 = &int_max;

2874 2875 2876
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2877 2878
}

J
Jiri Pirko 已提交
2879 2880 2881
int neigh_proc_dointvec(struct ctl_table *ctl, int write,
			void __user *buffer, size_t *lenp, loff_t *ppos)
{
2882 2883 2884 2885
	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2886 2887 2888 2889 2890 2891 2892
}
EXPORT_SYMBOL(neigh_proc_dointvec);

int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
				void __user *buffer,
				size_t *lenp, loff_t *ppos)
{
2893 2894 2895 2896
	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2897 2898 2899 2900 2901 2902 2903
}
EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);

static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
					      void __user *buffer,
					      size_t *lenp, loff_t *ppos)
{
2904 2905 2906 2907
	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2908 2909 2910 2911 2912 2913
}

int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
				   void __user *buffer,
				   size_t *lenp, loff_t *ppos)
{
2914 2915 2916 2917
	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2918 2919 2920 2921 2922 2923 2924
}
EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);

static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
					  void __user *buffer,
					  size_t *lenp, loff_t *ppos)
{
2925 2926 2927 2928
	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2929 2930
}

J
Jiri Pirko 已提交
2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946
#define NEIGH_PARMS_DATA_OFFSET(index)	\
	(&((struct neigh_parms *) 0)->data[index])

#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
	[NEIGH_VAR_ ## attr] = { \
		.procname	= name, \
		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
		.maxlen		= sizeof(int), \
		.mode		= mval, \
		.proc_handler	= proc, \
	}

#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)

#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
J
Jiri Pirko 已提交
2947
	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
J
Jiri Pirko 已提交
2948 2949

#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
J
Jiri Pirko 已提交
2950
	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
J
Jiri Pirko 已提交
2951 2952

#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
J
Jiri Pirko 已提交
2953
	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
J
Jiri Pirko 已提交
2954 2955

#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
J
Jiri Pirko 已提交
2956
	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
J
Jiri Pirko 已提交
2957 2958

#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
J
Jiri Pirko 已提交
2959
	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2960

L
Linus Torvalds 已提交
2961 2962
static struct neigh_sysctl_table {
	struct ctl_table_header *sysctl_header;
E
Eric Dumazet 已提交
2963
	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2964
} neigh_sysctl_template __read_mostly = {
L
Linus Torvalds 已提交
2965
	.neigh_vars = {
J
Jiri Pirko 已提交
2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
E
Eric Dumazet 已提交
2981
		[NEIGH_VAR_GC_INTERVAL] = {
L
Linus Torvalds 已提交
2982 2983 2984
			.procname	= "gc_interval",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
2985
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
2986
		},
E
Eric Dumazet 已提交
2987
		[NEIGH_VAR_GC_THRESH1] = {
L
Linus Torvalds 已提交
2988 2989 2990
			.procname	= "gc_thresh1",
			.maxlen		= sizeof(int),
			.mode		= 0644,
2991 2992 2993
			.extra1 	= &zero,
			.extra2		= &int_max,
			.proc_handler	= proc_dointvec_minmax,
L
Linus Torvalds 已提交
2994
		},
E
Eric Dumazet 已提交
2995
		[NEIGH_VAR_GC_THRESH2] = {
L
Linus Torvalds 已提交
2996 2997 2998
			.procname	= "gc_thresh2",
			.maxlen		= sizeof(int),
			.mode		= 0644,
2999 3000 3001
			.extra1 	= &zero,
			.extra2		= &int_max,
			.proc_handler	= proc_dointvec_minmax,
L
Linus Torvalds 已提交
3002
		},
E
Eric Dumazet 已提交
3003
		[NEIGH_VAR_GC_THRESH3] = {
L
Linus Torvalds 已提交
3004 3005 3006
			.procname	= "gc_thresh3",
			.maxlen		= sizeof(int),
			.mode		= 0644,
3007 3008 3009
			.extra1 	= &zero,
			.extra2		= &int_max,
			.proc_handler	= proc_dointvec_minmax,
L
Linus Torvalds 已提交
3010
		},
3011
		{},
L
Linus Torvalds 已提交
3012 3013 3014 3015
	},
};

int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3016
			  proc_handler *handler)
L
Linus Torvalds 已提交
3017
{
J
Jiri Pirko 已提交
3018
	int i;
3019
	struct neigh_sysctl_table *t;
J
Jiri Pirko 已提交
3020
	const char *dev_name_source;
3021
	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3022
	char *p_name;
L
Linus Torvalds 已提交
3023

3024
	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
L
Linus Torvalds 已提交
3025
	if (!t)
3026 3027
		goto err;

J
Jiri Pirko 已提交
3028
	for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) {
J
Jiri Pirko 已提交
3029
		t->neigh_vars[i].data += (long) p;
J
Jiri Pirko 已提交
3030
		t->neigh_vars[i].extra1 = dev;
3031
		t->neigh_vars[i].extra2 = p;
J
Jiri Pirko 已提交
3032
	}
L
Linus Torvalds 已提交
3033 3034 3035

	if (dev) {
		dev_name_source = dev->name;
3036
		/* Terminate the table early */
E
Eric Dumazet 已提交
3037 3038
		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
L
Linus Torvalds 已提交
3039
	} else {
3040
		dev_name_source = "default";
E
Eric Dumazet 已提交
3041 3042 3043 3044
		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
L
Linus Torvalds 已提交
3045 3046
	}

3047
	if (handler) {
L
Linus Torvalds 已提交
3048
		/* RetransTime */
E
Eric Dumazet 已提交
3049
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
L
Linus Torvalds 已提交
3050
		/* ReachableTime */
E
Eric Dumazet 已提交
3051
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
L
Linus Torvalds 已提交
3052
		/* RetransTime (in milliseconds)*/
E
Eric Dumazet 已提交
3053
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
L
Linus Torvalds 已提交
3054
		/* ReachableTime (in milliseconds) */
E
Eric Dumazet 已提交
3055
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
L
Linus Torvalds 已提交
3056 3057
	}

3058 3059 3060 3061
	/* Don't export sysctls to unprivileged users */
	if (neigh_parms_net(p)->user_ns != &init_user_ns)
		t->neigh_vars[0].procname = NULL;

3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072
	switch (neigh_parms_family(p)) {
	case AF_INET:
	      p_name = "ipv4";
	      break;
	case AF_INET6:
	      p_name = "ipv6";
	      break;
	default:
	      BUG();
	}

3073 3074
	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
		p_name, dev_name_source);
3075
	t->sysctl_header =
3076
		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3077
	if (!t->sysctl_header)
3078
		goto free;
3079

L
Linus Torvalds 已提交
3080 3081 3082
	p->sysctl_table = t;
	return 0;

3083
free:
L
Linus Torvalds 已提交
3084
	kfree(t);
3085 3086
err:
	return -ENOBUFS;
L
Linus Torvalds 已提交
3087
}
3088
EXPORT_SYMBOL(neigh_sysctl_register);
L
Linus Torvalds 已提交
3089 3090 3091 3092 3093 3094

void neigh_sysctl_unregister(struct neigh_parms *p)
{
	if (p->sysctl_table) {
		struct neigh_sysctl_table *t = p->sysctl_table;
		p->sysctl_table = NULL;
3095
		unregister_net_sysctl_table(t->sysctl_header);
L
Linus Torvalds 已提交
3096 3097 3098
		kfree(t);
	}
}
3099
EXPORT_SYMBOL(neigh_sysctl_unregister);
L
Linus Torvalds 已提交
3100 3101 3102

#endif	/* CONFIG_SYSCTL */

3103 3104
static int __init neigh_init(void)
{
3105 3106 3107
	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3108

3109 3110 3111
	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
		      NULL);
	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3112 3113 3114 3115 3116 3117

	return 0;
}

subsys_initcall(neigh_init);