neighbour.c 79.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 *	Generic address resolution entity
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
 *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Fixes:
 *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
 *	Harald Welte		Add neighbour cache statistics like rtstat
 */

J
Joe Perches 已提交
18 19
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

20
#include <linux/slab.h>
L
Linus Torvalds 已提交
21 22 23 24 25 26 27 28 29 30
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/times.h>
31
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
32 33 34
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
35
#include <net/netevent.h>
36
#include <net/netlink.h>
L
Linus Torvalds 已提交
37 38
#include <linux/rtnetlink.h>
#include <linux/random.h>
39
#include <linux/string.h>
40
#include <linux/log2.h>
41
#include <linux/inetdevice.h>
42
#include <net/addrconf.h>
L
Linus Torvalds 已提交
43

44
#define DEBUG
L
Linus Torvalds 已提交
45
#define NEIGH_DEBUG 1
46 47 48 49 50
#define neigh_dbg(level, fmt, ...)		\
do {						\
	if (level <= NEIGH_DEBUG)		\
		pr_debug(fmt, ##__VA_ARGS__);	\
} while (0)
L
Linus Torvalds 已提交
51 52 53 54

#define PNEIGH_HASHMASK		0xF

static void neigh_timer_handler(unsigned long arg);
55 56 57
static void __neigh_notify(struct neighbour *n, int type, int flags,
			   u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
L
Linus Torvalds 已提交
58 59
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);

60
#ifdef CONFIG_PROC_FS
61
static const struct file_operations neigh_stat_seq_fops;
62
#endif
L
Linus Torvalds 已提交
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91

/*
   Neighbour hash table buckets are protected with rwlock tbl->lock.

   - All the scans/updates to hash buckets MUST be made under this lock.
   - NOTHING clever should be made under this lock: no callbacks
     to protocol backends, no attempts to send something to network.
     It will result in deadlocks, if backend/driver wants to use neighbour
     cache.
   - If the entry requires some non-trivial actions, increase
     its reference count and release table lock.

   Neighbour entries are protected:
   - with reference count.
   - with rwlock neigh->lock

   Reference count prevents destruction.

   neigh->lock mainly serializes ll address data and its validity state.
   However, the same lock is used to protect another entry fields:
    - timer
    - resolution queue

   Again, nothing clever shall be made under neigh->lock,
   the most complicated procedure, which we allow is dev->hard_header.
   It is supposed, that dev->hard_header is simplistic and does
   not make callbacks to neighbour tables.
 */

92
static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
93 94 95 96 97
{
	kfree_skb(skb);
	return -ENETDOWN;
}

98 99 100 101 102
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
	if (neigh->parms->neigh_cleanup)
		neigh->parms->neigh_cleanup(neigh);

103
	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
104
	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
105 106 107
	neigh_release(neigh);
}

L
Linus Torvalds 已提交
108 109 110 111 112 113 114 115
/*
 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 * It corresponds to default IPv6 settings and is not overridable,
 * because it is really reasonable choice.
 */

unsigned long neigh_rand_reach_time(unsigned long base)
{
116
	return base ? (prandom_u32() % base) + (base >> 1) : 0;
L
Linus Torvalds 已提交
117
}
118
EXPORT_SYMBOL(neigh_rand_reach_time);
L
Linus Torvalds 已提交
119 120 121 122 123 124


static int neigh_forced_gc(struct neigh_table *tbl)
{
	int shrunk = 0;
	int i;
125
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
126 127 128 129

	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);

	write_lock_bh(&tbl->lock);
130 131
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
132
	for (i = 0; i < (1 << nht->hash_shift); i++) {
133 134
		struct neighbour *n;
		struct neighbour __rcu **np;
L
Linus Torvalds 已提交
135

136
		np = &nht->hash_buckets[i];
137 138
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
139 140 141 142 143 144 145
			/* Neighbour record may be discarded if:
			 * - nobody refers to it.
			 * - it is not permanent
			 */
			write_lock(&n->lock);
			if (atomic_read(&n->refcnt) == 1 &&
			    !(n->nud_state & NUD_PERMANENT)) {
146 147 148
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						  lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
149 150 151
				n->dead = 1;
				shrunk	= 1;
				write_unlock(&n->lock);
152
				neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166
				continue;
			}
			write_unlock(&n->lock);
			np = &n->next;
		}
	}

	tbl->last_flush = jiffies;

	write_unlock_bh(&tbl->lock);

	return shrunk;
}

167 168 169 170 171 172 173 174 175 176
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
	neigh_hold(n);
	if (unlikely(mod_timer(&n->timer, when))) {
		printk("NEIGH: BUG, double timer add, state is %x\n",
		       n->nud_state);
		dump_stack();
	}
}

L
Linus Torvalds 已提交
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static int neigh_del_timer(struct neighbour *n)
{
	if ((n->nud_state & NUD_IN_TIMER) &&
	    del_timer(&n->timer)) {
		neigh_release(n);
		return 1;
	}
	return 0;
}

static void pneigh_queue_purge(struct sk_buff_head *list)
{
	struct sk_buff *skb;

	while ((skb = skb_dequeue(list)) != NULL) {
		dev_put(skb->dev);
		kfree_skb(skb);
	}
}

197
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
L
Linus Torvalds 已提交
198 199
{
	int i;
200
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
201

202 203 204
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));

205
	for (i = 0; i < (1 << nht->hash_shift); i++) {
206 207
		struct neighbour *n;
		struct neighbour __rcu **np = &nht->hash_buckets[i];
L
Linus Torvalds 已提交
208

209 210
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
211 212 213 214
			if (dev && n->dev != dev) {
				np = &n->next;
				continue;
			}
215 216 217
			rcu_assign_pointer(*np,
				   rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
218 219 220 221 222 223 224 225 226 227 228 229 230 231
			write_lock(&n->lock);
			neigh_del_timer(n);
			n->dead = 1;

			if (atomic_read(&n->refcnt) != 1) {
				/* The most unpleasant situation.
				   We must destroy neighbour entry,
				   but someone still uses it.

				   The destroy will be delayed until
				   the last user releases us, but
				   we must kill timers etc. and move
				   it to safe state.
				 */
232
				__skb_queue_purge(&n->arp_queue);
E
Eric Dumazet 已提交
233
				n->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
234 235 236 237 238
				n->output = neigh_blackhole;
				if (n->nud_state & NUD_VALID)
					n->nud_state = NUD_NOARP;
				else
					n->nud_state = NUD_NONE;
239
				neigh_dbg(2, "neigh %p is stray\n", n);
L
Linus Torvalds 已提交
240 241
			}
			write_unlock(&n->lock);
242
			neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
243 244
		}
	}
245
}
L
Linus Torvalds 已提交
246

247 248 249 250 251 252
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
	write_unlock_bh(&tbl->lock);
}
253
EXPORT_SYMBOL(neigh_changeaddr);
254 255 256 257 258

int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	write_lock_bh(&tbl->lock);
	neigh_flush_dev(tbl, dev);
L
Linus Torvalds 已提交
259 260 261 262 263 264 265
	pneigh_ifdown(tbl, dev);
	write_unlock_bh(&tbl->lock);

	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	return 0;
}
266
EXPORT_SYMBOL(neigh_ifdown);
L
Linus Torvalds 已提交
267

268
static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
L
Linus Torvalds 已提交
269 270 271 272 273 274 275 276 277 278
{
	struct neighbour *n = NULL;
	unsigned long now = jiffies;
	int entries;

	entries = atomic_inc_return(&tbl->entries) - 1;
	if (entries >= tbl->gc_thresh3 ||
	    (entries >= tbl->gc_thresh2 &&
	     time_after(now, tbl->last_flush + 5 * HZ))) {
		if (!neigh_forced_gc(tbl) &&
279 280 281 282
		    entries >= tbl->gc_thresh3) {
			net_info_ratelimited("%s: neighbor table overflow!\n",
					     tbl->id);
			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
L
Linus Torvalds 已提交
283
			goto out_entries;
284
		}
L
Linus Torvalds 已提交
285 286
	}

287
	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
L
Linus Torvalds 已提交
288 289 290
	if (!n)
		goto out_entries;

291
	__skb_queue_head_init(&n->arp_queue);
L
Linus Torvalds 已提交
292
	rwlock_init(&n->lock);
293
	seqlock_init(&n->ha_lock);
L
Linus Torvalds 已提交
294 295 296
	n->updated	  = n->used = now;
	n->nud_state	  = NUD_NONE;
	n->output	  = neigh_blackhole;
297
	seqlock_init(&n->hh.hh_lock);
L
Linus Torvalds 已提交
298
	n->parms	  = neigh_parms_clone(&tbl->parms);
299
	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
L
Linus Torvalds 已提交
300 301 302 303 304 305 306 307 308 309 310 311 312

	NEIGH_CACHE_STAT_INC(tbl, allocs);
	n->tbl		  = tbl;
	atomic_set(&n->refcnt, 1);
	n->dead		  = 1;
out:
	return n;

out_entries:
	atomic_dec(&tbl->entries);
	goto out;
}

313 314
static void neigh_get_hash_rnd(u32 *x)
{
315
	*x = get_random_u32() | 1;
316 317
}

318
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
L
Linus Torvalds 已提交
319
{
320
	size_t size = (1 << shift) * sizeof(struct neighbour *);
321
	struct neigh_hash_table *ret;
E
Eric Dumazet 已提交
322
	struct neighbour __rcu **buckets;
323
	int i;
L
Linus Torvalds 已提交
324

325 326 327 328 329 330
	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
	if (!ret)
		return NULL;
	if (size <= PAGE_SIZE)
		buckets = kzalloc(size, GFP_ATOMIC);
	else
E
Eric Dumazet 已提交
331
		buckets = (struct neighbour __rcu **)
332 333 334 335 336
			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
					   get_order(size));
	if (!buckets) {
		kfree(ret);
		return NULL;
L
Linus Torvalds 已提交
337
	}
E
Eric Dumazet 已提交
338
	ret->hash_buckets = buckets;
339
	ret->hash_shift = shift;
340 341
	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
		neigh_get_hash_rnd(&ret->hash_rnd[i]);
L
Linus Torvalds 已提交
342 343 344
	return ret;
}

345
static void neigh_hash_free_rcu(struct rcu_head *head)
L
Linus Torvalds 已提交
346
{
347 348 349
	struct neigh_hash_table *nht = container_of(head,
						    struct neigh_hash_table,
						    rcu);
350
	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
E
Eric Dumazet 已提交
351
	struct neighbour __rcu **buckets = nht->hash_buckets;
L
Linus Torvalds 已提交
352 353

	if (size <= PAGE_SIZE)
354
		kfree(buckets);
L
Linus Torvalds 已提交
355
	else
356 357
		free_pages((unsigned long)buckets, get_order(size));
	kfree(nht);
L
Linus Torvalds 已提交
358 359
}

360
static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
361
						unsigned long new_shift)
L
Linus Torvalds 已提交
362
{
363 364
	unsigned int i, hash;
	struct neigh_hash_table *new_nht, *old_nht;
L
Linus Torvalds 已提交
365 366 367

	NEIGH_CACHE_STAT_INC(tbl, hash_grows);

368 369
	old_nht = rcu_dereference_protected(tbl->nht,
					    lockdep_is_held(&tbl->lock));
370
	new_nht = neigh_hash_alloc(new_shift);
371 372
	if (!new_nht)
		return old_nht;
L
Linus Torvalds 已提交
373

374
	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
L
Linus Torvalds 已提交
375 376
		struct neighbour *n, *next;

377 378
		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
						   lockdep_is_held(&tbl->lock));
379 380 381 382
		     n != NULL;
		     n = next) {
			hash = tbl->hash(n->primary_key, n->dev,
					 new_nht->hash_rnd);
L
Linus Torvalds 已提交
383

384
			hash >>= (32 - new_nht->hash_shift);
385 386 387 388 389 390 391 392
			next = rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock));

			rcu_assign_pointer(n->next,
					   rcu_dereference_protected(
						new_nht->hash_buckets[hash],
						lockdep_is_held(&tbl->lock)));
			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
L
Linus Torvalds 已提交
393 394 395
		}
	}

396 397 398
	rcu_assign_pointer(tbl->nht, new_nht);
	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
	return new_nht;
L
Linus Torvalds 已提交
399 400 401 402 403 404
}

struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
			       struct net_device *dev)
{
	struct neighbour *n;
405

L
Linus Torvalds 已提交
406 407
	NEIGH_CACHE_STAT_INC(tbl, lookups);

408
	rcu_read_lock_bh();
409 410 411 412 413
	n = __neigh_lookup_noref(tbl, pkey, dev);
	if (n) {
		if (!atomic_inc_not_zero(&n->refcnt))
			n = NULL;
		NEIGH_CACHE_STAT_INC(tbl, hits);
L
Linus Torvalds 已提交
414
	}
415

416
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
417 418
	return n;
}
419
EXPORT_SYMBOL(neigh_lookup);
L
Linus Torvalds 已提交
420

421 422
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
				     const void *pkey)
L
Linus Torvalds 已提交
423 424 425
{
	struct neighbour *n;
	int key_len = tbl->key_len;
426
	u32 hash_val;
427
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
428 429 430

	NEIGH_CACHE_STAT_INC(tbl, lookups);

431 432
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);
433
	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
434 435 436 437

	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
	     n != NULL;
	     n = rcu_dereference_bh(n->next)) {
438
		if (!memcmp(n->primary_key, pkey, key_len) &&
439
		    net_eq(dev_net(n->dev), net)) {
440 441
			if (!atomic_inc_not_zero(&n->refcnt))
				n = NULL;
L
Linus Torvalds 已提交
442 443 444 445
			NEIGH_CACHE_STAT_INC(tbl, hits);
			break;
		}
	}
446

447
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
448 449
	return n;
}
450
EXPORT_SYMBOL(neigh_lookup_nodev);
L
Linus Torvalds 已提交
451

452 453
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
				 struct net_device *dev, bool want_ref)
L
Linus Torvalds 已提交
454 455 456 457
{
	u32 hash_val;
	int key_len = tbl->key_len;
	int error;
458
	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
459
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475

	if (!n) {
		rc = ERR_PTR(-ENOBUFS);
		goto out;
	}

	memcpy(n->primary_key, pkey, key_len);
	n->dev = dev;
	dev_hold(dev);

	/* Protocol specific setup. */
	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

476
	if (dev->netdev_ops->ndo_neigh_construct) {
477
		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
478 479 480 481 482 483
		if (error < 0) {
			rc = ERR_PTR(error);
			goto out_neigh_release;
		}
	}

484 485 486 487 488 489 490
	/* Device specific setup. */
	if (n->parms->neigh_setup &&
	    (error = n->parms->neigh_setup(n)) < 0) {
		rc = ERR_PTR(error);
		goto out_neigh_release;
	}

J
Jiri Pirko 已提交
491
	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
L
Linus Torvalds 已提交
492 493

	write_lock_bh(&tbl->lock);
494 495
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
496

497 498
	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
L
Linus Torvalds 已提交
499

500
	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
L
Linus Torvalds 已提交
501 502 503 504 505 506

	if (n->parms->dead) {
		rc = ERR_PTR(-EINVAL);
		goto out_tbl_unlock;
	}

507 508 509 510 511
	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
					    lockdep_is_held(&tbl->lock));
	     n1 != NULL;
	     n1 = rcu_dereference_protected(n1->next,
			lockdep_is_held(&tbl->lock))) {
L
Linus Torvalds 已提交
512
		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
513 514
			if (want_ref)
				neigh_hold(n1);
L
Linus Torvalds 已提交
515 516 517 518 519 520
			rc = n1;
			goto out_tbl_unlock;
		}
	}

	n->dead = 0;
521 522
	if (want_ref)
		neigh_hold(n);
523 524 525 526
	rcu_assign_pointer(n->next,
			   rcu_dereference_protected(nht->hash_buckets[hash_val],
						     lockdep_is_held(&tbl->lock)));
	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
L
Linus Torvalds 已提交
527
	write_unlock_bh(&tbl->lock);
528
	neigh_dbg(2, "neigh %p is created\n", n);
L
Linus Torvalds 已提交
529 530 531 532 533 534 535 536 537
	rc = n;
out:
	return rc;
out_tbl_unlock:
	write_unlock_bh(&tbl->lock);
out_neigh_release:
	neigh_release(n);
	goto out;
}
538
EXPORT_SYMBOL(__neigh_create);
L
Linus Torvalds 已提交
539

540
static u32 pneigh_hash(const void *pkey, int key_len)
541 542 543 544 545 546
{
	u32 hash_val = *(u32 *)(pkey + key_len - 4);
	hash_val ^= (hash_val >> 16);
	hash_val ^= hash_val >> 8;
	hash_val ^= hash_val >> 4;
	hash_val &= PNEIGH_HASHMASK;
547 548
	return hash_val;
}
549

550 551 552 553 554 555 556
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
					      struct net *net,
					      const void *pkey,
					      int key_len,
					      struct net_device *dev)
{
	while (n) {
557
		if (!memcmp(n->key, pkey, key_len) &&
558
		    net_eq(pneigh_net(n), net) &&
559
		    (n->dev == dev || !n->dev))
560 561
			return n;
		n = n->next;
562
	}
563 564
	return NULL;
}
565

566 567 568 569 570 571 572 573
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
		struct net *net, const void *pkey, struct net_device *dev)
{
	int key_len = tbl->key_len;
	u32 hash_val = pneigh_hash(pkey, key_len);

	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
				 net, pkey, key_len, dev);
574
}
575
EXPORT_SYMBOL_GPL(__pneigh_lookup);
576

577 578
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
				    struct net *net, const void *pkey,
L
Linus Torvalds 已提交
579 580 581 582
				    struct net_device *dev, int creat)
{
	struct pneigh_entry *n;
	int key_len = tbl->key_len;
583
	u32 hash_val = pneigh_hash(pkey, key_len);
L
Linus Torvalds 已提交
584 585

	read_lock_bh(&tbl->lock);
586 587
	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
			      net, pkey, key_len, dev);
L
Linus Torvalds 已提交
588
	read_unlock_bh(&tbl->lock);
589 590

	if (n || !creat)
L
Linus Torvalds 已提交
591 592
		goto out;

593 594
	ASSERT_RTNL();

L
Linus Torvalds 已提交
595 596 597 598
	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
	if (!n)
		goto out;

599
	write_pnet(&n->net, net);
L
Linus Torvalds 已提交
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
	memcpy(n->key, pkey, key_len);
	n->dev = dev;
	if (dev)
		dev_hold(dev);

	if (tbl->pconstructor && tbl->pconstructor(n)) {
		if (dev)
			dev_put(dev);
		kfree(n);
		n = NULL;
		goto out;
	}

	write_lock_bh(&tbl->lock);
	n->next = tbl->phash_buckets[hash_val];
	tbl->phash_buckets[hash_val] = n;
	write_unlock_bh(&tbl->lock);
out:
	return n;
}
620
EXPORT_SYMBOL(pneigh_lookup);
L
Linus Torvalds 已提交
621 622


623
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
L
Linus Torvalds 已提交
624 625 626 627
		  struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	int key_len = tbl->key_len;
628
	u32 hash_val = pneigh_hash(pkey, key_len);
L
Linus Torvalds 已提交
629 630 631 632

	write_lock_bh(&tbl->lock);
	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
	     np = &n->next) {
633
		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
634
		    net_eq(pneigh_net(n), net)) {
L
Linus Torvalds 已提交
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
			*np = n->next;
			write_unlock_bh(&tbl->lock);
			if (tbl->pdestructor)
				tbl->pdestructor(n);
			if (n->dev)
				dev_put(n->dev);
			kfree(n);
			return 0;
		}
	}
	write_unlock_bh(&tbl->lock);
	return -ENOENT;
}

static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
	struct pneigh_entry *n, **np;
	u32 h;

	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
		np = &tbl->phash_buckets[h];
		while ((n = *np) != NULL) {
			if (!dev || n->dev == dev) {
				*np = n->next;
				if (tbl->pdestructor)
					tbl->pdestructor(n);
				if (n->dev)
					dev_put(n->dev);
				kfree(n);
				continue;
			}
			np = &n->next;
		}
	}
	return -ENOENT;
}

672 673 674 675 676 677 678
static void neigh_parms_destroy(struct neigh_parms *parms);

static inline void neigh_parms_put(struct neigh_parms *parms)
{
	if (atomic_dec_and_test(&parms->refcnt))
		neigh_parms_destroy(parms);
}
L
Linus Torvalds 已提交
679 680 681 682 683 684 685

/*
 *	neighbour must already be out of the table;
 *
 */
void neigh_destroy(struct neighbour *neigh)
{
686 687
	struct net_device *dev = neigh->dev;

L
Linus Torvalds 已提交
688 689 690
	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);

	if (!neigh->dead) {
J
Joe Perches 已提交
691
		pr_warn("Destroying alive neighbour %p\n", neigh);
L
Linus Torvalds 已提交
692 693 694 695 696
		dump_stack();
		return;
	}

	if (neigh_del_timer(neigh))
J
Joe Perches 已提交
697
		pr_warn("Impossible event\n");
L
Linus Torvalds 已提交
698

699 700 701
	write_lock_bh(&neigh->lock);
	__skb_queue_purge(&neigh->arp_queue);
	write_unlock_bh(&neigh->lock);
E
Eric Dumazet 已提交
702
	neigh->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
703

704
	if (dev->netdev_ops->ndo_neigh_destroy)
705
		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
706

707
	dev_put(dev);
L
Linus Torvalds 已提交
708 709
	neigh_parms_put(neigh->parms);

710
	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
L
Linus Torvalds 已提交
711 712

	atomic_dec(&neigh->tbl->entries);
713
	kfree_rcu(neigh, rcu);
L
Linus Torvalds 已提交
714
}
715
EXPORT_SYMBOL(neigh_destroy);
L
Linus Torvalds 已提交
716 717 718 719 720 721 722 723

/* Neighbour state is suspicious;
   disable fast path.

   Called with write_locked neigh.
 */
static void neigh_suspect(struct neighbour *neigh)
{
724
	neigh_dbg(2, "neigh %p is suspected\n", neigh);
L
Linus Torvalds 已提交
725 726 727 728 729 730 731 732 733 734 735

	neigh->output = neigh->ops->output;
}

/* Neighbour state is OK;
   enable fast path.

   Called with write_locked neigh.
 */
static void neigh_connect(struct neighbour *neigh)
{
736
	neigh_dbg(2, "neigh %p is connected\n", neigh);
L
Linus Torvalds 已提交
737 738 739 740

	neigh->output = neigh->ops->connected_output;
}

741
static void neigh_periodic_work(struct work_struct *work)
L
Linus Torvalds 已提交
742
{
743
	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
744 745
	struct neighbour *n;
	struct neighbour __rcu **np;
746
	unsigned int i;
747
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
748 749 750

	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);

751
	write_lock_bh(&tbl->lock);
752 753
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
754 755 756 757 758

	/*
	 *	periodically recompute ReachableTime from random function
	 */

759
	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
L
Linus Torvalds 已提交
760
		struct neigh_parms *p;
761
		tbl->last_rand = jiffies;
762
		list_for_each_entry(p, &tbl->parms_list, list)
L
Linus Torvalds 已提交
763
			p->reachable_time =
J
Jiri Pirko 已提交
764
				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
L
Linus Torvalds 已提交
765 766
	}

767 768 769
	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
		goto out;

770
	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
771
		np = &nht->hash_buckets[i];
L
Linus Torvalds 已提交
772

773 774
		while ((n = rcu_dereference_protected(*np,
				lockdep_is_held(&tbl->lock))) != NULL) {
775
			unsigned int state;
L
Linus Torvalds 已提交
776

777
			write_lock(&n->lock);
L
Linus Torvalds 已提交
778

779 780 781 782 783
			state = n->nud_state;
			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
				write_unlock(&n->lock);
				goto next_elt;
			}
L
Linus Torvalds 已提交
784

785 786
			if (time_before(n->used, n->confirmed))
				n->used = n->confirmed;
L
Linus Torvalds 已提交
787

788 789
			if (atomic_read(&n->refcnt) == 1 &&
			    (state == NUD_FAILED ||
J
Jiri Pirko 已提交
790
			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
791 792 793 794 795 796
				*np = n->next;
				n->dead = 1;
				write_unlock(&n->lock);
				neigh_cleanup_and_release(n);
				continue;
			}
L
Linus Torvalds 已提交
797 798 799
			write_unlock(&n->lock);

next_elt:
800 801 802 803 804 805 806 807 808
			np = &n->next;
		}
		/*
		 * It's fine to release lock here, even if hash table
		 * grows while we are preempted.
		 */
		write_unlock_bh(&tbl->lock);
		cond_resched();
		write_lock_bh(&tbl->lock);
809 810
		nht = rcu_dereference_protected(tbl->nht,
						lockdep_is_held(&tbl->lock));
L
Linus Torvalds 已提交
811
	}
812
out:
J
Jiri Pirko 已提交
813 814 815
	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
	 * BASE_REACHABLE_TIME.
L
Linus Torvalds 已提交
816
	 */
817
	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
J
Jiri Pirko 已提交
818
			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
819
	write_unlock_bh(&tbl->lock);
L
Linus Torvalds 已提交
820 821 822 823 824
}

static __inline__ int neigh_max_probes(struct neighbour *n)
{
	struct neigh_parms *p = n->parms;
825 826 827
	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
	        NEIGH_VAR(p, MCAST_PROBES));
L
Linus Torvalds 已提交
828 829
}

830
static void neigh_invalidate(struct neighbour *neigh)
E
Eric Dumazet 已提交
831 832
	__releases(neigh->lock)
	__acquires(neigh->lock)
833 834 835 836
{
	struct sk_buff *skb;

	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
837
	neigh_dbg(2, "neigh %p is failed\n", neigh);
838 839 840 841 842 843 844 845 846 847 848 849 850
	neigh->updated = jiffies;

	/* It is very thin place. report_unreachable is very complicated
	   routine. Particularly, it can hit the same neighbour entry!

	   So that, we try to be accurate and avoid dead loop. --ANK
	 */
	while (neigh->nud_state == NUD_FAILED &&
	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
		write_unlock(&neigh->lock);
		neigh->ops->error_report(neigh, skb);
		write_lock(&neigh->lock);
	}
851
	__skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
852
	neigh->arp_queue_len_bytes = 0;
853 854
}

E
Eric Dumazet 已提交
855 856 857
static void neigh_probe(struct neighbour *neigh)
	__releases(neigh->lock)
{
858
	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
E
Eric Dumazet 已提交
859 860
	/* keep skb alive even if arp_queue overflows */
	if (skb)
861
		skb = skb_clone(skb, GFP_ATOMIC);
E
Eric Dumazet 已提交
862
	write_unlock(&neigh->lock);
863 864
	if (neigh->ops->solicit)
		neigh->ops->solicit(neigh, skb);
E
Eric Dumazet 已提交
865 866 867 868
	atomic_inc(&neigh->probes);
	kfree_skb(skb);
}

L
Linus Torvalds 已提交
869 870 871 872 873 874
/* Called when a timer expires for a neighbour entry. */

static void neigh_timer_handler(unsigned long arg)
{
	unsigned long now, next;
	struct neighbour *neigh = (struct neighbour *)arg;
875
	unsigned int state;
L
Linus Torvalds 已提交
876 877 878 879 880 881 882 883
	int notify = 0;

	write_lock(&neigh->lock);

	state = neigh->nud_state;
	now = jiffies;
	next = now + HZ;

884
	if (!(state & NUD_IN_TIMER))
L
Linus Torvalds 已提交
885 886 887
		goto out;

	if (state & NUD_REACHABLE) {
888
		if (time_before_eq(now,
L
Linus Torvalds 已提交
889
				   neigh->confirmed + neigh->parms->reachable_time)) {
890
			neigh_dbg(2, "neigh %p is still alive\n", neigh);
L
Linus Torvalds 已提交
891 892
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else if (time_before_eq(now,
J
Jiri Pirko 已提交
893 894
					  neigh->used +
					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
895
			neigh_dbg(2, "neigh %p is delayed\n", neigh);
L
Linus Torvalds 已提交
896
			neigh->nud_state = NUD_DELAY;
897
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
898
			neigh_suspect(neigh);
J
Jiri Pirko 已提交
899
			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
L
Linus Torvalds 已提交
900
		} else {
901
			neigh_dbg(2, "neigh %p is suspected\n", neigh);
L
Linus Torvalds 已提交
902
			neigh->nud_state = NUD_STALE;
903
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
904
			neigh_suspect(neigh);
905
			notify = 1;
L
Linus Torvalds 已提交
906 907
		}
	} else if (state & NUD_DELAY) {
908
		if (time_before_eq(now,
J
Jiri Pirko 已提交
909 910
				   neigh->confirmed +
				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
911
			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
L
Linus Torvalds 已提交
912
			neigh->nud_state = NUD_REACHABLE;
913
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
914
			neigh_connect(neigh);
915
			notify = 1;
L
Linus Torvalds 已提交
916 917
			next = neigh->confirmed + neigh->parms->reachable_time;
		} else {
918
			neigh_dbg(2, "neigh %p is probed\n", neigh);
L
Linus Torvalds 已提交
919
			neigh->nud_state = NUD_PROBE;
920
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
921
			atomic_set(&neigh->probes, 0);
922
			notify = 1;
J
Jiri Pirko 已提交
923
			next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
L
Linus Torvalds 已提交
924 925 926
		}
	} else {
		/* NUD_PROBE|NUD_INCOMPLETE */
J
Jiri Pirko 已提交
927
		next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
L
Linus Torvalds 已提交
928 929 930 931 932 933
	}

	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
		neigh->nud_state = NUD_FAILED;
		notify = 1;
934
		neigh_invalidate(neigh);
935
		goto out;
L
Linus Torvalds 已提交
936 937 938 939 940
	}

	if (neigh->nud_state & NUD_IN_TIMER) {
		if (time_before(next, jiffies + HZ/2))
			next = jiffies + HZ/2;
941 942
		if (!mod_timer(&neigh->timer, next))
			neigh_hold(neigh);
L
Linus Torvalds 已提交
943 944
	}
	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
E
Eric Dumazet 已提交
945
		neigh_probe(neigh);
946
	} else {
947
out:
948 949
		write_unlock(&neigh->lock);
	}
T
Thomas Graf 已提交
950

951
	if (notify)
952
		neigh_update_notify(neigh, 0);
L
Linus Torvalds 已提交
953 954 955 956 957 958 959

	neigh_release(neigh);
}

int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
	int rc;
E
Eric Dumazet 已提交
960
	bool immediate_probe = false;
L
Linus Torvalds 已提交
961 962 963 964 965 966

	write_lock_bh(&neigh->lock);

	rc = 0;
	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
		goto out_unlock_bh;
967 968
	if (neigh->dead)
		goto out_dead;
L
Linus Torvalds 已提交
969 970

	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
J
Jiri Pirko 已提交
971 972
		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
E
Eric Dumazet 已提交
973 974
			unsigned long next, now = jiffies;

J
Jiri Pirko 已提交
975 976
			atomic_set(&neigh->probes,
				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
L
Linus Torvalds 已提交
977
			neigh->nud_state     = NUD_INCOMPLETE;
E
Eric Dumazet 已提交
978
			neigh->updated = now;
J
Jiri Pirko 已提交
979 980
			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
					 HZ/2);
E
Eric Dumazet 已提交
981 982
			neigh_add_timer(neigh, next);
			immediate_probe = true;
L
Linus Torvalds 已提交
983 984
		} else {
			neigh->nud_state = NUD_FAILED;
985
			neigh->updated = jiffies;
L
Linus Torvalds 已提交
986 987
			write_unlock_bh(&neigh->lock);

988
			kfree_skb(skb);
L
Linus Torvalds 已提交
989 990 991
			return 1;
		}
	} else if (neigh->nud_state & NUD_STALE) {
992
		neigh_dbg(2, "neigh %p is delayed\n", neigh);
L
Linus Torvalds 已提交
993
		neigh->nud_state = NUD_DELAY;
994
		neigh->updated = jiffies;
J
Jiri Pirko 已提交
995 996
		neigh_add_timer(neigh, jiffies +
				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
L
Linus Torvalds 已提交
997 998 999 1000
	}

	if (neigh->nud_state == NUD_INCOMPLETE) {
		if (skb) {
E
Eric Dumazet 已提交
1001
			while (neigh->arp_queue_len_bytes + skb->truesize >
J
Jiri Pirko 已提交
1002
			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
L
Linus Torvalds 已提交
1003
				struct sk_buff *buff;
E
Eric Dumazet 已提交
1004

1005
				buff = __skb_dequeue(&neigh->arp_queue);
E
Eric Dumazet 已提交
1006 1007 1008
				if (!buff)
					break;
				neigh->arp_queue_len_bytes -= buff->truesize;
L
Linus Torvalds 已提交
1009
				kfree_skb(buff);
1010
				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
L
Linus Torvalds 已提交
1011
			}
E
Eric Dumazet 已提交
1012
			skb_dst_force(skb);
L
Linus Torvalds 已提交
1013
			__skb_queue_tail(&neigh->arp_queue, skb);
E
Eric Dumazet 已提交
1014
			neigh->arp_queue_len_bytes += skb->truesize;
L
Linus Torvalds 已提交
1015 1016 1017 1018
		}
		rc = 1;
	}
out_unlock_bh:
E
Eric Dumazet 已提交
1019 1020 1021 1022 1023
	if (immediate_probe)
		neigh_probe(neigh);
	else
		write_unlock(&neigh->lock);
	local_bh_enable();
L
Linus Torvalds 已提交
1024
	return rc;
1025 1026 1027 1028 1029 1030 1031

out_dead:
	if (neigh->nud_state & NUD_STALE)
		goto out_unlock_bh;
	write_unlock_bh(&neigh->lock);
	kfree_skb(skb);
	return 1;
L
Linus Torvalds 已提交
1032
}
1033
EXPORT_SYMBOL(__neigh_event_send);
L
Linus Torvalds 已提交
1034

1035
static void neigh_update_hhs(struct neighbour *neigh)
L
Linus Torvalds 已提交
1036 1037
{
	struct hh_cache *hh;
1038
	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
D
Doug Kehn 已提交
1039 1040 1041 1042
		= NULL;

	if (neigh->dev->header_ops)
		update = neigh->dev->header_ops->cache_update;
L
Linus Torvalds 已提交
1043 1044

	if (update) {
1045 1046
		hh = &neigh->hh;
		if (hh->hh_len) {
1047
			write_seqlock_bh(&hh->hh_lock);
L
Linus Torvalds 已提交
1048
			update(hh, neigh->dev, neigh->ha);
1049
			write_sequnlock_bh(&hh->hh_lock);
L
Linus Torvalds 已提交
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
		}
	}
}



/* Generic update routine.
   -- lladdr is new lladdr or NULL, if it is not supplied.
   -- new    is new state.
   -- flags
	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
				if it is different.
	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1063
				lladdr instead of overriding it
L
Linus Torvalds 已提交
1064 1065 1066
				if it is different.
	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.

1067
	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
L
Linus Torvalds 已提交
1068 1069 1070 1071 1072 1073 1074 1075
				NTF_ROUTER flag.
	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
				a router.

   Caller MUST hold reference count on the entry.
 */

int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1076
		 u32 flags, u32 nlmsg_pid)
L
Linus Torvalds 已提交
1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
{
	u8 old;
	int err;
	int notify = 0;
	struct net_device *dev;
	int update_isrouter = 0;

	write_lock_bh(&neigh->lock);

	dev    = neigh->dev;
	old    = neigh->nud_state;
	err    = -EPERM;

1090
	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
L
Linus Torvalds 已提交
1091 1092
	    (old & (NUD_NOARP | NUD_PERMANENT)))
		goto out;
1093 1094
	if (neigh->dead)
		goto out;
L
Linus Torvalds 已提交
1095 1096 1097 1098 1099 1100 1101 1102

	if (!(new & NUD_VALID)) {
		neigh_del_timer(neigh);
		if (old & NUD_CONNECTED)
			neigh_suspect(neigh);
		neigh->nud_state = new;
		err = 0;
		notify = old & NUD_VALID;
1103 1104 1105 1106 1107
		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
		    (new & NUD_FAILED)) {
			neigh_invalidate(neigh);
			notify = 1;
		}
L
Linus Torvalds 已提交
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
		goto out;
	}

	/* Compare new lladdr with cached one */
	if (!dev->addr_len) {
		/* First case: device needs no address. */
		lladdr = neigh->ha;
	} else if (lladdr) {
		/* The second case: if something is already cached
		   and a new address is proposed:
		   - compare new & old
		   - if they are different, check override flag
		 */
1121
		if ((old & NUD_VALID) &&
L
Linus Torvalds 已提交
1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
		    !memcmp(lladdr, neigh->ha, dev->addr_len))
			lladdr = neigh->ha;
	} else {
		/* No address is supplied; if we know something,
		   use it, otherwise discard the request.
		 */
		err = -EINVAL;
		if (!(old & NUD_VALID))
			goto out;
		lladdr = neigh->ha;
	}

	/* If entry was valid and address is not changed,
	   do not change entry state, if new one is STALE.
	 */
	err = 0;
	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
	if (old & NUD_VALID) {
		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
			update_isrouter = 0;
			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
			    (old & NUD_CONNECTED)) {
				lladdr = neigh->ha;
				new = NUD_STALE;
			} else
				goto out;
		} else {
1149 1150
			if (lladdr == neigh->ha && new == NUD_STALE &&
			    !(flags & NEIGH_UPDATE_F_ADMIN))
L
Linus Torvalds 已提交
1151 1152 1153 1154
				new = old;
		}
	}

1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
	/* Update timestamps only once we know we will make a change to the
	 * neighbour entry. Otherwise we risk to move the locktime window with
	 * noop updates and ignore relevant ARP updates.
	 */
	if (new != old || lladdr != neigh->ha) {
		if (new & NUD_CONNECTED)
			neigh->confirmed = jiffies;
		neigh->updated = jiffies;
	}

L
Linus Torvalds 已提交
1165 1166
	if (new != old) {
		neigh_del_timer(neigh);
1167 1168
		if (new & NUD_PROBE)
			atomic_set(&neigh->probes, 0);
1169
		if (new & NUD_IN_TIMER)
1170 1171
			neigh_add_timer(neigh, (jiffies +
						((new & NUD_REACHABLE) ?
1172 1173
						 neigh->parms->reachable_time :
						 0)));
L
Linus Torvalds 已提交
1174
		neigh->nud_state = new;
1175
		notify = 1;
L
Linus Torvalds 已提交
1176 1177 1178
	}

	if (lladdr != neigh->ha) {
1179
		write_seqlock(&neigh->ha_lock);
L
Linus Torvalds 已提交
1180
		memcpy(&neigh->ha, lladdr, dev->addr_len);
1181
		write_sequnlock(&neigh->ha_lock);
L
Linus Torvalds 已提交
1182 1183 1184
		neigh_update_hhs(neigh);
		if (!(new & NUD_CONNECTED))
			neigh->confirmed = jiffies -
J
Jiri Pirko 已提交
1185
				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
L
Linus Torvalds 已提交
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
		notify = 1;
	}
	if (new == old)
		goto out;
	if (new & NUD_CONNECTED)
		neigh_connect(neigh);
	else
		neigh_suspect(neigh);
	if (!(old & NUD_VALID)) {
		struct sk_buff *skb;

		/* Again: avoid dead loop if something went wrong */

		while (neigh->nud_state & NUD_VALID &&
		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1201 1202
			struct dst_entry *dst = skb_dst(skb);
			struct neighbour *n2, *n1 = neigh;
L
Linus Torvalds 已提交
1203
			write_unlock_bh(&neigh->lock);
1204 1205

			rcu_read_lock();
1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219

			/* Why not just use 'neigh' as-is?  The problem is that
			 * things such as shaper, eql, and sch_teql can end up
			 * using alternative, different, neigh objects to output
			 * the packet in the output path.  So what we need to do
			 * here is re-lookup the top-level neigh in the path so
			 * we can reinject the packet there.
			 */
			n2 = NULL;
			if (dst) {
				n2 = dst_neigh_lookup_skb(dst, skb);
				if (n2)
					n1 = n2;
			}
1220
			n1->output(n1, skb);
1221 1222
			if (n2)
				neigh_release(n2);
1223 1224
			rcu_read_unlock();

L
Linus Torvalds 已提交
1225 1226
			write_lock_bh(&neigh->lock);
		}
1227
		__skb_queue_purge(&neigh->arp_queue);
E
Eric Dumazet 已提交
1228
		neigh->arp_queue_len_bytes = 0;
L
Linus Torvalds 已提交
1229 1230 1231 1232 1233 1234 1235 1236
	}
out:
	if (update_isrouter) {
		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
			(neigh->flags | NTF_ROUTER) :
			(neigh->flags & ~NTF_ROUTER);
	}
	write_unlock_bh(&neigh->lock);
1237 1238

	if (notify)
1239
		neigh_update_notify(neigh, nlmsg_pid);
T
Thomas Graf 已提交
1240

L
Linus Torvalds 已提交
1241 1242
	return err;
}
1243
EXPORT_SYMBOL(neigh_update);
L
Linus Torvalds 已提交
1244

J
Jiri Benc 已提交
1245 1246 1247 1248 1249
/* Update the neigh to listen temporarily for probe responses, even if it is
 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
 */
void __neigh_set_probe_once(struct neighbour *neigh)
{
1250 1251
	if (neigh->dead)
		return;
J
Jiri Benc 已提交
1252 1253 1254
	neigh->updated = jiffies;
	if (!(neigh->nud_state & NUD_FAILED))
		return;
1255 1256
	neigh->nud_state = NUD_INCOMPLETE;
	atomic_set(&neigh->probes, neigh_max_probes(neigh));
J
Jiri Benc 已提交
1257 1258 1259 1260 1261
	neigh_add_timer(neigh,
			jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
}
EXPORT_SYMBOL(__neigh_set_probe_once);

L
Linus Torvalds 已提交
1262 1263 1264 1265 1266 1267 1268
struct neighbour *neigh_event_ns(struct neigh_table *tbl,
				 u8 *lladdr, void *saddr,
				 struct net_device *dev)
{
	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
						 lladdr || !dev->addr_len);
	if (neigh)
1269
		neigh_update(neigh, lladdr, NUD_STALE,
1270
			     NEIGH_UPDATE_F_OVERRIDE, 0);
L
Linus Torvalds 已提交
1271 1272
	return neigh;
}
1273
EXPORT_SYMBOL(neigh_event_ns);
L
Linus Torvalds 已提交
1274

E
Eric Dumazet 已提交
1275
/* called with read_lock_bh(&n->lock); */
1276
static void neigh_hh_init(struct neighbour *n)
L
Linus Torvalds 已提交
1277
{
1278 1279
	struct net_device *dev = n->dev;
	__be16 prot = n->tbl->protocol;
1280
	struct hh_cache	*hh = &n->hh;
1281 1282

	write_lock_bh(&n->lock);
E
Eric Dumazet 已提交
1283

1284 1285 1286
	/* Only one thread can come in here and initialize the
	 * hh_cache entry.
	 */
1287 1288
	if (!hh->hh_len)
		dev->header_ops->cache(n, hh, prot);
E
Eric Dumazet 已提交
1289

1290
	write_unlock_bh(&n->lock);
L
Linus Torvalds 已提交
1291 1292 1293 1294
}

/* Slow and careful. */

1295
int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1296 1297 1298 1299 1300 1301
{
	int rc = 0;

	if (!neigh_event_send(neigh, skb)) {
		int err;
		struct net_device *dev = neigh->dev;
1302
		unsigned int seq;
E
Eric Dumazet 已提交
1303

1304
		if (dev->header_ops->cache && !neigh->hh.hh_len)
1305
			neigh_hh_init(neigh);
E
Eric Dumazet 已提交
1306

1307
		do {
1308
			__skb_pull(skb, skb_network_offset(skb));
1309 1310 1311 1312
			seq = read_seqbegin(&neigh->ha_lock);
			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
					      neigh->ha, NULL, skb->len);
		} while (read_seqretry(&neigh->ha_lock, seq));
E
Eric Dumazet 已提交
1313

L
Linus Torvalds 已提交
1314
		if (err >= 0)
1315
			rc = dev_queue_xmit(skb);
L
Linus Torvalds 已提交
1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
		else
			goto out_kfree_skb;
	}
out:
	return rc;
out_kfree_skb:
	rc = -EINVAL;
	kfree_skb(skb);
	goto out;
}
1326
EXPORT_SYMBOL(neigh_resolve_output);
L
Linus Torvalds 已提交
1327 1328 1329

/* As fast as possible without hh cache */

1330
int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
L
Linus Torvalds 已提交
1331 1332
{
	struct net_device *dev = neigh->dev;
1333
	unsigned int seq;
1334
	int err;
L
Linus Torvalds 已提交
1335

1336
	do {
1337
		__skb_pull(skb, skb_network_offset(skb));
1338 1339 1340 1341 1342
		seq = read_seqbegin(&neigh->ha_lock);
		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
				      neigh->ha, NULL, skb->len);
	} while (read_seqretry(&neigh->ha_lock, seq));

L
Linus Torvalds 已提交
1343
	if (err >= 0)
1344
		err = dev_queue_xmit(skb);
L
Linus Torvalds 已提交
1345 1346 1347 1348 1349 1350
	else {
		err = -EINVAL;
		kfree_skb(skb);
	}
	return err;
}
1351
EXPORT_SYMBOL(neigh_connected_output);
L
Linus Torvalds 已提交
1352

1353 1354 1355 1356 1357 1358
int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
{
	return dev_queue_xmit(skb);
}
EXPORT_SYMBOL(neigh_direct_output);

L
Linus Torvalds 已提交
1359 1360 1361 1362 1363
static void neigh_proxy_process(unsigned long arg)
{
	struct neigh_table *tbl = (struct neigh_table *)arg;
	long sched_next = 0;
	unsigned long now = jiffies;
1364
	struct sk_buff *skb, *n;
L
Linus Torvalds 已提交
1365 1366 1367

	spin_lock(&tbl->proxy_queue.lock);

1368 1369
	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
		long tdif = NEIGH_CB(skb)->sched_next - now;
L
Linus Torvalds 已提交
1370 1371

		if (tdif <= 0) {
1372
			struct net_device *dev = skb->dev;
1373

1374
			__skb_unlink(skb, &tbl->proxy_queue);
1375 1376
			if (tbl->proxy_redo && netif_running(dev)) {
				rcu_read_lock();
1377
				tbl->proxy_redo(skb);
1378 1379
				rcu_read_unlock();
			} else {
1380
				kfree_skb(skb);
1381
			}
L
Linus Torvalds 已提交
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396

			dev_put(dev);
		} else if (!sched_next || tdif < sched_next)
			sched_next = tdif;
	}
	del_timer(&tbl->proxy_timer);
	if (sched_next)
		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
	spin_unlock(&tbl->proxy_queue.lock);
}

void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
		    struct sk_buff *skb)
{
	unsigned long now = jiffies;
1397 1398

	unsigned long sched_next = now + (prandom_u32() %
J
Jiri Pirko 已提交
1399
					  NEIGH_VAR(p, PROXY_DELAY));
L
Linus Torvalds 已提交
1400

J
Jiri Pirko 已提交
1401
	if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
L
Linus Torvalds 已提交
1402 1403 1404
		kfree_skb(skb);
		return;
	}
1405 1406 1407

	NEIGH_CB(skb)->sched_next = sched_next;
	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
L
Linus Torvalds 已提交
1408 1409 1410 1411 1412 1413

	spin_lock(&tbl->proxy_queue.lock);
	if (del_timer(&tbl->proxy_timer)) {
		if (time_before(tbl->proxy_timer.expires, sched_next))
			sched_next = tbl->proxy_timer.expires;
	}
E
Eric Dumazet 已提交
1414
	skb_dst_drop(skb);
L
Linus Torvalds 已提交
1415 1416 1417 1418 1419
	dev_hold(skb->dev);
	__skb_queue_tail(&tbl->proxy_queue, skb);
	mod_timer(&tbl->proxy_timer, sched_next);
	spin_unlock(&tbl->proxy_queue.lock);
}
1420
EXPORT_SYMBOL(pneigh_enqueue);
L
Linus Torvalds 已提交
1421

1422
static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1423 1424 1425 1426
						      struct net *net, int ifindex)
{
	struct neigh_parms *p;

1427
	list_for_each_entry(p, &tbl->parms_list, list) {
1428
		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1429
		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1430 1431 1432 1433 1434
			return p;
	}

	return NULL;
}
L
Linus Torvalds 已提交
1435 1436 1437 1438

struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
				      struct neigh_table *tbl)
{
1439
	struct neigh_parms *p;
1440 1441
	struct net *net = dev_net(dev);
	const struct net_device_ops *ops = dev->netdev_ops;
1442

1443
	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
1444 1445 1446 1447
	if (p) {
		p->tbl		  = tbl;
		atomic_set(&p->refcnt, 1);
		p->reachable_time =
J
Jiri Pirko 已提交
1448
				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1449 1450
		dev_hold(dev);
		p->dev = dev;
1451
		write_pnet(&p->net, net);
1452
		p->sysctl_table = NULL;
1453

1454
		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1455
			dev_put(dev);
1456 1457
			kfree(p);
			return NULL;
L
Linus Torvalds 已提交
1458
		}
1459

L
Linus Torvalds 已提交
1460
		write_lock_bh(&tbl->lock);
1461
		list_add(&p->list, &tbl->parms.list);
L
Linus Torvalds 已提交
1462
		write_unlock_bh(&tbl->lock);
1463 1464

		neigh_parms_data_state_cleanall(p);
L
Linus Torvalds 已提交
1465 1466 1467
	}
	return p;
}
1468
EXPORT_SYMBOL(neigh_parms_alloc);
L
Linus Torvalds 已提交
1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482

static void neigh_rcu_free_parms(struct rcu_head *head)
{
	struct neigh_parms *parms =
		container_of(head, struct neigh_parms, rcu_head);

	neigh_parms_put(parms);
}

void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
	if (!parms || parms == &tbl->parms)
		return;
	write_lock_bh(&tbl->lock);
1483 1484
	list_del(&parms->list);
	parms->dead = 1;
L
Linus Torvalds 已提交
1485
	write_unlock_bh(&tbl->lock);
1486 1487 1488
	if (parms->dev)
		dev_put(parms->dev);
	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
L
Linus Torvalds 已提交
1489
}
1490
EXPORT_SYMBOL(neigh_parms_release);
L
Linus Torvalds 已提交
1491

1492
static void neigh_parms_destroy(struct neigh_parms *parms)
L
Linus Torvalds 已提交
1493 1494 1495 1496
{
	kfree(parms);
}

1497 1498
static struct lock_class_key neigh_table_proxy_queue_class;

1499 1500 1501
static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;

void neigh_table_init(int index, struct neigh_table *tbl)
L
Linus Torvalds 已提交
1502 1503 1504 1505
{
	unsigned long now = jiffies;
	unsigned long phsize;

1506 1507
	INIT_LIST_HEAD(&tbl->parms_list);
	list_add(&tbl->parms.list, &tbl->parms_list);
E
Eric Dumazet 已提交
1508
	write_pnet(&tbl->parms.net, &init_net);
L
Linus Torvalds 已提交
1509 1510
	atomic_set(&tbl->parms.refcnt, 1);
	tbl->parms.reachable_time =
J
Jiri Pirko 已提交
1511
			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
L
Linus Torvalds 已提交
1512 1513 1514 1515

	tbl->stats = alloc_percpu(struct neigh_statistics);
	if (!tbl->stats)
		panic("cannot create neighbour cache statistics");
1516

L
Linus Torvalds 已提交
1517
#ifdef CONFIG_PROC_FS
1518 1519
	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
			      &neigh_stat_seq_fops, tbl))
L
Linus Torvalds 已提交
1520 1521 1522
		panic("cannot create neighbour proc dir entry");
#endif

1523
	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
L
Linus Torvalds 已提交
1524 1525

	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
A
Andrew Morton 已提交
1526
	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
L
Linus Torvalds 已提交
1527

1528
	if (!tbl->nht || !tbl->phash_buckets)
L
Linus Torvalds 已提交
1529 1530
		panic("cannot allocate neighbour cache hashes");

1531 1532 1533 1534 1535 1536
	if (!tbl->entry_size)
		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
					tbl->key_len, NEIGH_PRIV_ALIGN);
	else
		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);

L
Linus Torvalds 已提交
1537
	rwlock_init(&tbl->lock);
1538
	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1539 1540
	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
			tbl->parms.reachable_time);
1541
	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1542 1543
	skb_queue_head_init_class(&tbl->proxy_queue,
			&neigh_table_proxy_queue_class);
L
Linus Torvalds 已提交
1544 1545 1546

	tbl->last_flush = now;
	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1547

1548
	neigh_tables[index] = tbl;
L
Linus Torvalds 已提交
1549
}
1550
EXPORT_SYMBOL(neigh_table_init);
L
Linus Torvalds 已提交
1551

1552
int neigh_table_clear(int index, struct neigh_table *tbl)
L
Linus Torvalds 已提交
1553
{
1554
	neigh_tables[index] = NULL;
L
Linus Torvalds 已提交
1555
	/* It is not clean... Fix it to unload IPv6 module safely */
1556
	cancel_delayed_work_sync(&tbl->gc_work);
L
Linus Torvalds 已提交
1557 1558 1559 1560
	del_timer_sync(&tbl->proxy_timer);
	pneigh_queue_purge(&tbl->proxy_queue);
	neigh_ifdown(tbl, NULL);
	if (atomic_read(&tbl->entries))
J
Joe Perches 已提交
1561
		pr_crit("neighbour leakage\n");
L
Linus Torvalds 已提交
1562

E
Eric Dumazet 已提交
1563 1564
	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
		 neigh_hash_free_rcu);
1565
	tbl->nht = NULL;
L
Linus Torvalds 已提交
1566 1567 1568 1569

	kfree(tbl->phash_buckets);
	tbl->phash_buckets = NULL;

1570 1571
	remove_proc_entry(tbl->id, init_net.proc_net_stat);

1572 1573 1574
	free_percpu(tbl->stats);
	tbl->stats = NULL;

L
Linus Torvalds 已提交
1575 1576
	return 0;
}
1577
EXPORT_SYMBOL(neigh_table_clear);
L
Linus Torvalds 已提交
1578

1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597
static struct neigh_table *neigh_find_table(int family)
{
	struct neigh_table *tbl = NULL;

	switch (family) {
	case AF_INET:
		tbl = neigh_tables[NEIGH_ARP_TABLE];
		break;
	case AF_INET6:
		tbl = neigh_tables[NEIGH_ND_TABLE];
		break;
	case AF_DECnet:
		tbl = neigh_tables[NEIGH_DN_TABLE];
		break;
	}

	return tbl;
}

1598 1599
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
			struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1600
{
1601
	struct net *net = sock_net(skb->sk);
1602 1603
	struct ndmsg *ndm;
	struct nlattr *dst_attr;
L
Linus Torvalds 已提交
1604
	struct neigh_table *tbl;
1605
	struct neighbour *neigh;
L
Linus Torvalds 已提交
1606
	struct net_device *dev = NULL;
1607
	int err = -EINVAL;
L
Linus Torvalds 已提交
1608

1609
	ASSERT_RTNL();
1610
	if (nlmsg_len(nlh) < sizeof(*ndm))
L
Linus Torvalds 已提交
1611 1612
		goto out;

1613 1614 1615 1616 1617 1618
	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
	if (dst_attr == NULL)
		goto out;

	ndm = nlmsg_data(nlh);
	if (ndm->ndm_ifindex) {
1619
		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1620 1621 1622 1623 1624 1625
		if (dev == NULL) {
			err = -ENODEV;
			goto out;
		}
	}

1626 1627 1628
	tbl = neigh_find_table(ndm->ndm_family);
	if (tbl == NULL)
		return -EAFNOSUPPORT;
L
Linus Torvalds 已提交
1629

1630 1631
	if (nla_len(dst_attr) < tbl->key_len)
		goto out;
L
Linus Torvalds 已提交
1632

1633 1634 1635 1636
	if (ndm->ndm_flags & NTF_PROXY) {
		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
		goto out;
	}
L
Linus Torvalds 已提交
1637

1638 1639
	if (dev == NULL)
		goto out;
1640

1641 1642 1643
	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
	if (neigh == NULL) {
		err = -ENOENT;
1644
		goto out;
L
Linus Torvalds 已提交
1645
	}
1646 1647 1648

	err = neigh_update(neigh, NULL, NUD_FAILED,
			   NEIGH_UPDATE_F_OVERRIDE |
1649 1650
			   NEIGH_UPDATE_F_ADMIN,
			   NETLINK_CB(skb).portid);
1651
	neigh_release(neigh);
1652

L
Linus Torvalds 已提交
1653 1654 1655 1656
out:
	return err;
}

1657 1658
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
		     struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1659
{
1660
	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1661
	struct net *net = sock_net(skb->sk);
1662 1663
	struct ndmsg *ndm;
	struct nlattr *tb[NDA_MAX+1];
L
Linus Torvalds 已提交
1664 1665
	struct neigh_table *tbl;
	struct net_device *dev = NULL;
1666 1667
	struct neighbour *neigh;
	void *dst, *lladdr;
1668
	int err;
L
Linus Torvalds 已提交
1669

1670
	ASSERT_RTNL();
1671
	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1672
	if (err < 0)
L
Linus Torvalds 已提交
1673 1674
		goto out;

1675 1676 1677 1678 1679 1680
	err = -EINVAL;
	if (tb[NDA_DST] == NULL)
		goto out;

	ndm = nlmsg_data(nlh);
	if (ndm->ndm_ifindex) {
1681
		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1682 1683 1684 1685 1686 1687
		if (dev == NULL) {
			err = -ENODEV;
			goto out;
		}

		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1688
			goto out;
1689 1690
	}

1691 1692 1693
	tbl = neigh_find_table(ndm->ndm_family);
	if (tbl == NULL)
		return -EAFNOSUPPORT;
L
Linus Torvalds 已提交
1694

1695 1696 1697 1698
	if (nla_len(tb[NDA_DST]) < tbl->key_len)
		goto out;
	dst = nla_data(tb[NDA_DST]);
	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
L
Linus Torvalds 已提交
1699

1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710
	if (ndm->ndm_flags & NTF_PROXY) {
		struct pneigh_entry *pn;

		err = -ENOBUFS;
		pn = pneigh_lookup(tbl, net, dst, dev, 1);
		if (pn) {
			pn->flags = ndm->ndm_flags;
			err = 0;
		}
		goto out;
	}
L
Linus Torvalds 已提交
1711

1712 1713
	if (dev == NULL)
		goto out;
1714

1715 1716 1717 1718
	neigh = neigh_lookup(tbl, dst, dev);
	if (neigh == NULL) {
		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
			err = -ENOENT;
1719
			goto out;
L
Linus Torvalds 已提交
1720 1721
		}

1722 1723 1724 1725 1726 1727 1728 1729 1730
		neigh = __neigh_lookup_errno(tbl, dst, dev);
		if (IS_ERR(neigh)) {
			err = PTR_ERR(neigh);
			goto out;
		}
	} else {
		if (nlh->nlmsg_flags & NLM_F_EXCL) {
			err = -EEXIST;
			neigh_release(neigh);
1731
			goto out;
1732
		}
L
Linus Torvalds 已提交
1733

1734 1735
		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
			flags &= ~NEIGH_UPDATE_F_OVERRIDE;
L
Linus Torvalds 已提交
1736 1737
	}

1738 1739 1740 1741
	if (ndm->ndm_flags & NTF_USE) {
		neigh_event_send(neigh, NULL);
		err = 0;
	} else
1742 1743
		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
				   NETLINK_CB(skb).portid);
1744 1745
	neigh_release(neigh);

L
Linus Torvalds 已提交
1746 1747 1748 1749
out:
	return err;
}

1750 1751
static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
1752 1753 1754 1755 1756
	struct nlattr *nest;

	nest = nla_nest_start(skb, NDTA_PARMS);
	if (nest == NULL)
		return -ENOBUFS;
1757

1758 1759 1760
	if ((parms->dev &&
	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
J
Jiri Pirko 已提交
1761 1762
	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1763 1764
	    /* approximative value for deprecated QUEUE_LEN (in packets) */
	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
J
Jiri Pirko 已提交
1765 1766 1767 1768 1769 1770 1771
			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
			NEIGH_VAR(parms, UCAST_PROBES)) ||
	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
			NEIGH_VAR(parms, MCAST_PROBES)) ||
1772 1773
	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
			NEIGH_VAR(parms, MCAST_REPROBES)) ||
1774 1775
	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
			  NDTPA_PAD) ||
1776
	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1777
			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
J
Jiri Pirko 已提交
1778
	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
1779
			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1780
	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1781
			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
J
Jiri Pirko 已提交
1782
	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1783
			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
J
Jiri Pirko 已提交
1784
	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1785
			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
J
Jiri Pirko 已提交
1786
	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1787
			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
J
Jiri Pirko 已提交
1788
	    nla_put_msecs(skb, NDTPA_LOCKTIME,
1789
			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1790
		goto nla_put_failure;
1791
	return nla_nest_end(skb, nest);
1792

1793
nla_put_failure:
1794 1795
	nla_nest_cancel(skb, nest);
	return -EMSGSIZE;
1796 1797
}

1798 1799
static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
			      u32 pid, u32 seq, int type, int flags)
1800 1801 1802 1803
{
	struct nlmsghdr *nlh;
	struct ndtmsg *ndtmsg;

1804 1805
	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
	if (nlh == NULL)
1806
		return -EMSGSIZE;
1807

1808
	ndtmsg = nlmsg_data(nlh);
1809 1810 1811

	read_lock_bh(&tbl->lock);
	ndtmsg->ndtm_family = tbl->family;
1812 1813
	ndtmsg->ndtm_pad1   = 0;
	ndtmsg->ndtm_pad2   = 0;
1814

1815
	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1816
	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1817 1818 1819 1820
	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
		goto nla_put_failure;
1821 1822 1823 1824
	{
		unsigned long now = jiffies;
		unsigned int flush_delta = now - tbl->last_flush;
		unsigned int rand_delta = now - tbl->last_rand;
1825
		struct neigh_hash_table *nht;
1826 1827 1828 1829 1830 1831 1832 1833 1834
		struct ndt_config ndc = {
			.ndtc_key_len		= tbl->key_len,
			.ndtc_entry_size	= tbl->entry_size,
			.ndtc_entries		= atomic_read(&tbl->entries),
			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
		};

1835 1836
		rcu_read_lock_bh();
		nht = rcu_dereference_bh(tbl->nht);
1837
		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1838
		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1839 1840
		rcu_read_unlock_bh();

1841 1842
		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
			goto nla_put_failure;
1843 1844 1845 1846 1847 1848 1849 1850
	}

	{
		int cpu;
		struct ndt_stats ndst;

		memset(&ndst, 0, sizeof(ndst));

1851
		for_each_possible_cpu(cpu) {
1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864
			struct neigh_statistics	*st;

			st = per_cpu_ptr(tbl->stats, cpu);
			ndst.ndts_allocs		+= st->allocs;
			ndst.ndts_destroys		+= st->destroys;
			ndst.ndts_hash_grows		+= st->hash_grows;
			ndst.ndts_res_failed		+= st->res_failed;
			ndst.ndts_lookups		+= st->lookups;
			ndst.ndts_hits			+= st->hits;
			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1865
			ndst.ndts_table_fulls		+= st->table_fulls;
1866 1867
		}

1868 1869
		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
				  NDTA_PAD))
1870
			goto nla_put_failure;
1871 1872 1873 1874
	}

	BUG_ON(tbl->parms.dev);
	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1875
		goto nla_put_failure;
1876 1877

	read_unlock_bh(&tbl->lock);
1878 1879
	nlmsg_end(skb, nlh);
	return 0;
1880

1881
nla_put_failure:
1882
	read_unlock_bh(&tbl->lock);
1883 1884
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1885 1886
}

1887 1888
static int neightbl_fill_param_info(struct sk_buff *skb,
				    struct neigh_table *tbl,
1889
				    struct neigh_parms *parms,
1890 1891
				    u32 pid, u32 seq, int type,
				    unsigned int flags)
1892 1893 1894 1895
{
	struct ndtmsg *ndtmsg;
	struct nlmsghdr *nlh;

1896 1897
	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
	if (nlh == NULL)
1898
		return -EMSGSIZE;
1899

1900
	ndtmsg = nlmsg_data(nlh);
1901 1902 1903

	read_lock_bh(&tbl->lock);
	ndtmsg->ndtm_family = tbl->family;
1904 1905
	ndtmsg->ndtm_pad1   = 0;
	ndtmsg->ndtm_pad2   = 0;
1906

1907 1908 1909
	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
	    neightbl_fill_parms(skb, parms) < 0)
		goto errout;
1910 1911

	read_unlock_bh(&tbl->lock);
1912 1913
	nlmsg_end(skb, nlh);
	return 0;
1914
errout:
1915
	read_unlock_bh(&tbl->lock);
1916 1917
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
1918
}
1919

1920
static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1921 1922 1923 1924 1925 1926 1927 1928
	[NDTA_NAME]		= { .type = NLA_STRING },
	[NDTA_THRESH1]		= { .type = NLA_U32 },
	[NDTA_THRESH2]		= { .type = NLA_U32 },
	[NDTA_THRESH3]		= { .type = NLA_U32 },
	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
	[NDTA_PARMS]		= { .type = NLA_NESTED },
};

1929
static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1930 1931 1932 1933 1934 1935
	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1936
	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
1937 1938 1939 1940 1941 1942 1943 1944 1945
	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
};

1946 1947
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
			struct netlink_ext_ack *extack)
1948
{
1949
	struct net *net = sock_net(skb->sk);
1950
	struct neigh_table *tbl;
1951 1952
	struct ndtmsg *ndtmsg;
	struct nlattr *tb[NDTA_MAX+1];
1953 1954
	bool found = false;
	int err, tidx;
1955

1956
	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1957
			  nl_neightbl_policy, extack);
1958 1959
	if (err < 0)
		goto errout;
1960

1961 1962 1963 1964 1965 1966
	if (tb[NDTA_NAME] == NULL) {
		err = -EINVAL;
		goto errout;
	}

	ndtmsg = nlmsg_data(nlh);
1967 1968 1969 1970 1971

	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
		tbl = neigh_tables[tidx];
		if (!tbl)
			continue;
1972 1973
		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
			continue;
1974 1975
		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
			found = true;
1976
			break;
1977
		}
1978 1979
	}

1980 1981
	if (!found)
		return -ENOENT;
1982

1983
	/*
1984 1985 1986 1987 1988
	 * We acquire tbl->lock to be nice to the periodic timers and
	 * make sure they always see a consistent set of values.
	 */
	write_lock_bh(&tbl->lock);

1989 1990
	if (tb[NDTA_PARMS]) {
		struct nlattr *tbp[NDTPA_MAX+1];
1991
		struct neigh_parms *p;
1992
		int i, ifindex = 0;
1993

1994
		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1995
				       nl_ntbl_parm_policy, extack);
1996 1997
		if (err < 0)
			goto errout_tbl_lock;
1998

1999 2000
		if (tbp[NDTPA_IFINDEX])
			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2001

2002
		p = lookup_neigh_parms(tbl, net, ifindex);
2003 2004
		if (p == NULL) {
			err = -ENOENT;
2005
			goto errout_tbl_lock;
2006 2007
		}

2008 2009 2010
		for (i = 1; i <= NDTPA_MAX; i++) {
			if (tbp[i] == NULL)
				continue;
2011

2012 2013
			switch (i) {
			case NDTPA_QUEUE_LEN:
J
Jiri Pirko 已提交
2014 2015 2016
				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
					      nla_get_u32(tbp[i]) *
					      SKB_TRUESIZE(ETH_FRAME_LEN));
E
Eric Dumazet 已提交
2017 2018
				break;
			case NDTPA_QUEUE_LENBYTES:
J
Jiri Pirko 已提交
2019 2020
				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
					      nla_get_u32(tbp[i]));
2021 2022
				break;
			case NDTPA_PROXY_QLEN:
J
Jiri Pirko 已提交
2023 2024
				NEIGH_VAR_SET(p, PROXY_QLEN,
					      nla_get_u32(tbp[i]));
2025 2026
				break;
			case NDTPA_APP_PROBES:
J
Jiri Pirko 已提交
2027 2028
				NEIGH_VAR_SET(p, APP_PROBES,
					      nla_get_u32(tbp[i]));
2029 2030
				break;
			case NDTPA_UCAST_PROBES:
J
Jiri Pirko 已提交
2031 2032
				NEIGH_VAR_SET(p, UCAST_PROBES,
					      nla_get_u32(tbp[i]));
2033 2034
				break;
			case NDTPA_MCAST_PROBES:
J
Jiri Pirko 已提交
2035 2036
				NEIGH_VAR_SET(p, MCAST_PROBES,
					      nla_get_u32(tbp[i]));
2037
				break;
2038 2039 2040 2041
			case NDTPA_MCAST_REPROBES:
				NEIGH_VAR_SET(p, MCAST_REPROBES,
					      nla_get_u32(tbp[i]));
				break;
2042
			case NDTPA_BASE_REACHABLE_TIME:
J
Jiri Pirko 已提交
2043 2044
				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
					      nla_get_msecs(tbp[i]));
2045 2046 2047 2048 2049 2050
				/* update reachable_time as well, otherwise, the change will
				 * only be effective after the next time neigh_periodic_work
				 * decides to recompute it (can be multiple minutes)
				 */
				p->reachable_time =
					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2051 2052
				break;
			case NDTPA_GC_STALETIME:
J
Jiri Pirko 已提交
2053 2054
				NEIGH_VAR_SET(p, GC_STALETIME,
					      nla_get_msecs(tbp[i]));
2055 2056
				break;
			case NDTPA_DELAY_PROBE_TIME:
J
Jiri Pirko 已提交
2057 2058
				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
					      nla_get_msecs(tbp[i]));
2059
				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2060 2061
				break;
			case NDTPA_RETRANS_TIME:
J
Jiri Pirko 已提交
2062 2063
				NEIGH_VAR_SET(p, RETRANS_TIME,
					      nla_get_msecs(tbp[i]));
2064 2065
				break;
			case NDTPA_ANYCAST_DELAY:
2066 2067
				NEIGH_VAR_SET(p, ANYCAST_DELAY,
					      nla_get_msecs(tbp[i]));
2068 2069
				break;
			case NDTPA_PROXY_DELAY:
2070 2071
				NEIGH_VAR_SET(p, PROXY_DELAY,
					      nla_get_msecs(tbp[i]));
2072 2073
				break;
			case NDTPA_LOCKTIME:
2074 2075
				NEIGH_VAR_SET(p, LOCKTIME,
					      nla_get_msecs(tbp[i]));
2076 2077 2078 2079
				break;
			}
		}
	}
2080

2081 2082 2083 2084 2085 2086
	err = -ENOENT;
	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
	    !net_eq(net, &init_net))
		goto errout_tbl_lock;

2087 2088
	if (tb[NDTA_THRESH1])
		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2089

2090 2091
	if (tb[NDTA_THRESH2])
		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2092

2093 2094
	if (tb[NDTA_THRESH3])
		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2095

2096 2097
	if (tb[NDTA_GC_INTERVAL])
		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2098 2099 2100

	err = 0;

2101
errout_tbl_lock:
2102
	write_unlock_bh(&tbl->lock);
2103
errout:
2104 2105 2106
	return err;
}

2107
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2108
{
2109
	struct net *net = sock_net(skb->sk);
2110 2111 2112
	int family, tidx, nidx = 0;
	int tbl_skip = cb->args[0];
	int neigh_skip = cb->args[1];
2113 2114
	struct neigh_table *tbl;

2115
	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2116

2117
	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2118 2119
		struct neigh_parms *p;

2120 2121 2122 2123
		tbl = neigh_tables[tidx];
		if (!tbl)
			continue;

2124
		if (tidx < tbl_skip || (family && tbl->family != family))
2125 2126
			continue;

2127
		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2128
				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2129
				       NLM_F_MULTI) < 0)
2130 2131
			break;

2132 2133 2134
		nidx = 0;
		p = list_next_entry(&tbl->parms, list);
		list_for_each_entry_from(p, &tbl->parms_list, list) {
2135
			if (!net_eq(neigh_parms_net(p), net))
2136 2137
				continue;

2138 2139
			if (nidx < neigh_skip)
				goto next;
2140

2141
			if (neightbl_fill_param_info(skb, tbl, p,
2142
						     NETLINK_CB(cb->skb).portid,
2143 2144
						     cb->nlh->nlmsg_seq,
						     RTM_NEWNEIGHTBL,
2145
						     NLM_F_MULTI) < 0)
2146
				goto out;
2147 2148
		next:
			nidx++;
2149 2150
		}

2151
		neigh_skip = 0;
2152 2153
	}
out:
2154 2155
	cb->args[0] = tidx;
	cb->args[1] = nidx;
2156 2157 2158

	return skb->len;
}
L
Linus Torvalds 已提交
2159

2160 2161
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
			   u32 pid, u32 seq, int type, unsigned int flags)
L
Linus Torvalds 已提交
2162 2163 2164
{
	unsigned long now = jiffies;
	struct nda_cacheinfo ci;
2165 2166 2167 2168 2169
	struct nlmsghdr *nlh;
	struct ndmsg *ndm;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
	if (nlh == NULL)
2170
		return -EMSGSIZE;
L
Linus Torvalds 已提交
2171

2172 2173
	ndm = nlmsg_data(nlh);
	ndm->ndm_family	 = neigh->ops->family;
2174 2175
	ndm->ndm_pad1    = 0;
	ndm->ndm_pad2    = 0;
2176 2177 2178
	ndm->ndm_flags	 = neigh->flags;
	ndm->ndm_type	 = neigh->type;
	ndm->ndm_ifindex = neigh->dev->ifindex;
L
Linus Torvalds 已提交
2179

2180 2181
	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
		goto nla_put_failure;
2182 2183 2184

	read_lock_bh(&neigh->lock);
	ndm->ndm_state	 = neigh->nud_state;
2185 2186 2187 2188 2189 2190 2191 2192
	if (neigh->nud_state & NUD_VALID) {
		char haddr[MAX_ADDR_LEN];

		neigh_ha_snapshot(haddr, neigh, neigh->dev);
		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
			read_unlock_bh(&neigh->lock);
			goto nla_put_failure;
		}
2193 2194
	}

2195 2196 2197
	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2198 2199 2200
	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
	read_unlock_bh(&neigh->lock);

2201 2202 2203
	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
		goto nla_put_failure;
2204

2205 2206
	nlmsg_end(skb, nlh);
	return 0;
2207 2208

nla_put_failure:
2209 2210
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
L
Linus Torvalds 已提交
2211 2212
}

2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228
static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
			    u32 pid, u32 seq, int type, unsigned int flags,
			    struct neigh_table *tbl)
{
	struct nlmsghdr *nlh;
	struct ndmsg *ndm;

	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
	if (nlh == NULL)
		return -EMSGSIZE;

	ndm = nlmsg_data(nlh);
	ndm->ndm_family	 = tbl->family;
	ndm->ndm_pad1    = 0;
	ndm->ndm_pad2    = 0;
	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2229
	ndm->ndm_type	 = RTN_UNICAST;
2230
	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2231 2232
	ndm->ndm_state	 = NUD_NONE;

2233 2234
	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
		goto nla_put_failure;
2235

2236 2237
	nlmsg_end(skb, nlh);
	return 0;
2238 2239 2240 2241 2242 2243

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

2244
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
T
Thomas Graf 已提交
2245 2246
{
	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2247
	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
T
Thomas Graf 已提交
2248
}
L
Linus Torvalds 已提交
2249

2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263
static bool neigh_master_filtered(struct net_device *dev, int master_idx)
{
	struct net_device *master;

	if (!master_idx)
		return false;

	master = netdev_master_upper_dev_get(dev);
	if (!master || master->ifindex != master_idx)
		return true;

	return false;
}

2264 2265 2266 2267 2268 2269 2270 2271
static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
{
	if (filter_idx && dev->ifindex != filter_idx)
		return true;

	return false;
}

L
Linus Torvalds 已提交
2272 2273 2274
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
			    struct netlink_callback *cb)
{
2275
	struct net *net = sock_net(skb->sk);
2276 2277
	const struct nlmsghdr *nlh = cb->nlh;
	struct nlattr *tb[NDA_MAX + 1];
L
Linus Torvalds 已提交
2278 2279 2280
	struct neighbour *n;
	int rc, h, s_h = cb->args[1];
	int idx, s_idx = idx = cb->args[2];
2281
	struct neigh_hash_table *nht;
2282
	int filter_master_idx = 0, filter_idx = 0;
2283 2284 2285
	unsigned int flags = NLM_F_MULTI;
	int err;

2286
	err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2287
	if (!err) {
2288 2289 2290
		if (tb[NDA_IFINDEX])
			filter_idx = nla_get_u32(tb[NDA_IFINDEX]);

2291 2292 2293
		if (tb[NDA_MASTER])
			filter_master_idx = nla_get_u32(tb[NDA_MASTER]);

2294
		if (filter_idx || filter_master_idx)
2295 2296
			flags |= NLM_F_DUMP_FILTERED;
	}
L
Linus Torvalds 已提交
2297

2298 2299 2300
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);

2301
	for (h = s_h; h < (1 << nht->hash_shift); h++) {
L
Linus Torvalds 已提交
2302 2303
		if (h > s_h)
			s_idx = 0;
2304 2305 2306
		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
		     n != NULL;
		     n = rcu_dereference_bh(n->next)) {
2307 2308 2309 2310
			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
				goto next;
			if (neigh_ifindex_filtered(n->dev, filter_idx) ||
			    neigh_master_filtered(n->dev, filter_master_idx))
2311
				goto next;
2312
			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
L
Linus Torvalds 已提交
2313
					    cb->nlh->nlmsg_seq,
2314
					    RTM_NEWNEIGH,
2315
					    flags) < 0) {
L
Linus Torvalds 已提交
2316 2317 2318
				rc = -1;
				goto out;
			}
2319
next:
2320
			idx++;
L
Linus Torvalds 已提交
2321 2322 2323 2324
		}
	}
	rc = skb->len;
out:
2325
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2326 2327 2328 2329 2330
	cb->args[1] = h;
	cb->args[2] = idx;
	return rc;
}

2331 2332 2333 2334 2335 2336 2337 2338 2339 2340
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
			     struct netlink_callback *cb)
{
	struct pneigh_entry *n;
	struct net *net = sock_net(skb->sk);
	int rc, h, s_h = cb->args[3];
	int idx, s_idx = idx = cb->args[4];

	read_lock_bh(&tbl->lock);

2341
	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2342 2343 2344
		if (h > s_h)
			s_idx = 0;
		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2345
			if (idx < s_idx || pneigh_net(n) != net)
2346
				goto next;
2347
			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2348 2349
					    cb->nlh->nlmsg_seq,
					    RTM_NEWNEIGH,
2350
					    NLM_F_MULTI, tbl) < 0) {
2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368
				read_unlock_bh(&tbl->lock);
				rc = -1;
				goto out;
			}
		next:
			idx++;
		}
	}

	read_unlock_bh(&tbl->lock);
	rc = skb->len;
out:
	cb->args[3] = h;
	cb->args[4] = idx;
	return rc;

}

2369
static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
L
Linus Torvalds 已提交
2370 2371 2372
{
	struct neigh_table *tbl;
	int t, family, s_t;
2373
	int proxy = 0;
2374
	int err;
L
Linus Torvalds 已提交
2375

2376
	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2377 2378 2379 2380 2381 2382 2383 2384

	/* check for full ndmsg structure presence, family member is
	 * the same for both structures
	 */
	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
		proxy = 1;

L
Linus Torvalds 已提交
2385 2386
	s_t = cb->args[0];

2387 2388 2389 2390 2391
	for (t = 0; t < NEIGH_NR_TABLES; t++) {
		tbl = neigh_tables[t];

		if (!tbl)
			continue;
L
Linus Torvalds 已提交
2392 2393 2394 2395 2396
		if (t < s_t || (family && tbl->family != family))
			continue;
		if (t > s_t)
			memset(&cb->args[1], 0, sizeof(cb->args) -
						sizeof(cb->args[0]));
2397 2398 2399 2400
		if (proxy)
			err = pneigh_dump_table(tbl, skb, cb);
		else
			err = neigh_dump_table(tbl, skb, cb);
2401 2402
		if (err < 0)
			break;
L
Linus Torvalds 已提交
2403 2404 2405 2406 2407 2408 2409 2410 2411
	}

	cb->args[0] = t;
	return skb->len;
}

void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
	int chain;
2412
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2413

2414 2415 2416
	rcu_read_lock_bh();
	nht = rcu_dereference_bh(tbl->nht);

2417
	read_lock(&tbl->lock); /* avoid resizes */
2418
	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
L
Linus Torvalds 已提交
2419 2420
		struct neighbour *n;

2421 2422 2423
		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
		     n != NULL;
		     n = rcu_dereference_bh(n->next))
L
Linus Torvalds 已提交
2424 2425
			cb(n, cookie);
	}
2426 2427
	read_unlock(&tbl->lock);
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2428 2429 2430 2431 2432 2433 2434 2435
}
EXPORT_SYMBOL(neigh_for_each);

/* The tbl->lock must be held as a writer and BH disabled. */
void __neigh_for_each_release(struct neigh_table *tbl,
			      int (*cb)(struct neighbour *))
{
	int chain;
2436
	struct neigh_hash_table *nht;
L
Linus Torvalds 已提交
2437

2438 2439
	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));
2440
	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2441 2442
		struct neighbour *n;
		struct neighbour __rcu **np;
L
Linus Torvalds 已提交
2443

2444
		np = &nht->hash_buckets[chain];
2445 2446
		while ((n = rcu_dereference_protected(*np,
					lockdep_is_held(&tbl->lock))) != NULL) {
L
Linus Torvalds 已提交
2447 2448 2449 2450 2451
			int release;

			write_lock(&n->lock);
			release = cb(n);
			if (release) {
2452 2453 2454
				rcu_assign_pointer(*np,
					rcu_dereference_protected(n->next,
						lockdep_is_held(&tbl->lock)));
L
Linus Torvalds 已提交
2455 2456 2457 2458
				n->dead = 1;
			} else
				np = &n->next;
			write_unlock(&n->lock);
2459 2460
			if (release)
				neigh_cleanup_and_release(n);
L
Linus Torvalds 已提交
2461 2462 2463 2464 2465
		}
	}
}
EXPORT_SYMBOL(__neigh_for_each_release);

2466
int neigh_xmit(int index, struct net_device *dev,
2467 2468
	       const void *addr, struct sk_buff *skb)
{
2469 2470
	int err = -EAFNOSUPPORT;
	if (likely(index < NEIGH_NR_TABLES)) {
2471 2472 2473
		struct neigh_table *tbl;
		struct neighbour *neigh;

2474
		tbl = neigh_tables[index];
2475 2476
		if (!tbl)
			goto out;
2477
		rcu_read_lock_bh();
2478 2479 2480 2481
		neigh = __neigh_lookup_noref(tbl, addr, dev);
		if (!neigh)
			neigh = __neigh_create(tbl, addr, dev, false);
		err = PTR_ERR(neigh);
2482 2483
		if (IS_ERR(neigh)) {
			rcu_read_unlock_bh();
2484
			goto out_kfree_skb;
2485
		}
2486
		err = neigh->output(neigh, skb);
2487
		rcu_read_unlock_bh();
2488
	}
2489 2490 2491 2492 2493 2494 2495
	else if (index == NEIGH_LINK_TABLE) {
		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
				      addr, NULL, skb->len);
		if (err < 0)
			goto out_kfree_skb;
		err = dev_queue_xmit(skb);
	}
2496 2497 2498 2499 2500 2501 2502 2503
out:
	return err;
out_kfree_skb:
	kfree_skb(skb);
	goto out;
}
EXPORT_SYMBOL(neigh_xmit);

L
Linus Torvalds 已提交
2504 2505 2506 2507 2508
#ifdef CONFIG_PROC_FS

static struct neighbour *neigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
2509
	struct net *net = seq_file_net(seq);
2510
	struct neigh_hash_table *nht = state->nht;
L
Linus Torvalds 已提交
2511 2512 2513 2514
	struct neighbour *n = NULL;
	int bucket = state->bucket;

	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2515
	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2516
		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
L
Linus Torvalds 已提交
2517 2518

		while (n) {
2519
			if (!net_eq(dev_net(n->dev), net))
2520
				goto next;
L
Linus Torvalds 已提交
2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532
			if (state->neigh_sub_iter) {
				loff_t fakep = 0;
				void *v;

				v = state->neigh_sub_iter(state, n, &fakep);
				if (!v)
					goto next;
			}
			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
				break;
			if (n->nud_state & ~NUD_NOARP)
				break;
2533 2534
next:
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549
		}

		if (n)
			break;
	}
	state->bucket = bucket;

	return n;
}

static struct neighbour *neigh_get_next(struct seq_file *seq,
					struct neighbour *n,
					loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
2550
	struct net *net = seq_file_net(seq);
2551
	struct neigh_hash_table *nht = state->nht;
L
Linus Torvalds 已提交
2552 2553 2554 2555 2556 2557

	if (state->neigh_sub_iter) {
		void *v = state->neigh_sub_iter(state, n, pos);
		if (v)
			return n;
	}
2558
	n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2559 2560 2561

	while (1) {
		while (n) {
2562
			if (!net_eq(dev_net(n->dev), net))
2563
				goto next;
L
Linus Torvalds 已提交
2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574
			if (state->neigh_sub_iter) {
				void *v = state->neigh_sub_iter(state, n, pos);
				if (v)
					return n;
				goto next;
			}
			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
				break;

			if (n->nud_state & ~NUD_NOARP)
				break;
2575 2576
next:
			n = rcu_dereference_bh(n->next);
L
Linus Torvalds 已提交
2577 2578 2579 2580 2581
		}

		if (n)
			break;

2582
		if (++state->bucket >= (1 << nht->hash_shift))
L
Linus Torvalds 已提交
2583 2584
			break;

2585
		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
L
Linus Torvalds 已提交
2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597
	}

	if (n && pos)
		--(*pos);
	return n;
}

static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
{
	struct neighbour *n = neigh_get_first(seq);

	if (n) {
2598
		--(*pos);
L
Linus Torvalds 已提交
2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610
		while (*pos) {
			n = neigh_get_next(seq, n, pos);
			if (!n)
				break;
		}
	}
	return *pos ? NULL : n;
}

static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
2611
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
2612 2613 2614 2615 2616 2617 2618
	struct neigh_table *tbl = state->tbl;
	struct pneigh_entry *pn = NULL;
	int bucket = state->bucket;

	state->flags |= NEIGH_SEQ_IS_PNEIGH;
	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
		pn = tbl->phash_buckets[bucket];
2619
		while (pn && !net_eq(pneigh_net(pn), net))
2620
			pn = pn->next;
L
Linus Torvalds 已提交
2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633
		if (pn)
			break;
	}
	state->bucket = bucket;

	return pn;
}

static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
					    struct pneigh_entry *pn,
					    loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
2634
	struct net *net = seq_file_net(seq);
L
Linus Torvalds 已提交
2635 2636
	struct neigh_table *tbl = state->tbl;

2637 2638 2639 2640
	do {
		pn = pn->next;
	} while (pn && !net_eq(pneigh_net(pn), net));

L
Linus Torvalds 已提交
2641 2642 2643 2644
	while (!pn) {
		if (++state->bucket > PNEIGH_HASHMASK)
			break;
		pn = tbl->phash_buckets[state->bucket];
2645
		while (pn && !net_eq(pneigh_net(pn), net))
2646
			pn = pn->next;
L
Linus Torvalds 已提交
2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661
		if (pn)
			break;
	}

	if (pn && pos)
		--(*pos);

	return pn;
}

static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
{
	struct pneigh_entry *pn = pneigh_get_first(seq);

	if (pn) {
2662
		--(*pos);
L
Linus Torvalds 已提交
2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675
		while (*pos) {
			pn = pneigh_get_next(seq, pn, pos);
			if (!pn)
				break;
		}
	}
	return *pos ? NULL : pn;
}

static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
	void *rc;
2676
	loff_t idxpos = *pos;
L
Linus Torvalds 已提交
2677

2678
	rc = neigh_get_idx(seq, &idxpos);
L
Linus Torvalds 已提交
2679
	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2680
		rc = pneigh_get_idx(seq, &idxpos);
L
Linus Torvalds 已提交
2681 2682 2683 2684 2685

	return rc;
}

void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2686
	__acquires(rcu_bh)
L
Linus Torvalds 已提交
2687 2688 2689 2690 2691 2692 2693
{
	struct neigh_seq_state *state = seq->private;

	state->tbl = tbl;
	state->bucket = 0;
	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);

2694 2695
	rcu_read_lock_bh();
	state->nht = rcu_dereference_bh(tbl->nht);
2696

2697
	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
L
Linus Torvalds 已提交
2698 2699 2700 2701 2702 2703 2704 2705 2706
}
EXPORT_SYMBOL(neigh_seq_start);

void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
	struct neigh_seq_state *state;
	void *rc;

	if (v == SEQ_START_TOKEN) {
2707
		rc = neigh_get_first(seq);
L
Linus Torvalds 已提交
2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728
		goto out;
	}

	state = seq->private;
	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
		rc = neigh_get_next(seq, v, NULL);
		if (rc)
			goto out;
		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
			rc = pneigh_get_first(seq);
	} else {
		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
		rc = pneigh_get_next(seq, v, NULL);
	}
out:
	++(*pos);
	return rc;
}
EXPORT_SYMBOL(neigh_seq_next);

void neigh_seq_stop(struct seq_file *seq, void *v)
2729
	__releases(rcu_bh)
L
Linus Torvalds 已提交
2730
{
2731
	rcu_read_unlock_bh();
L
Linus Torvalds 已提交
2732 2733 2734 2735 2736 2737 2738
}
EXPORT_SYMBOL(neigh_seq_stop);

/* statistics via seq_file */

static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
A
Alexey Dobriyan 已提交
2739
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2740 2741 2742 2743
	int cpu;

	if (*pos == 0)
		return SEQ_START_TOKEN;
2744

2745
	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
L
Linus Torvalds 已提交
2746 2747 2748 2749 2750 2751 2752 2753 2754 2755
		if (!cpu_possible(cpu))
			continue;
		*pos = cpu+1;
		return per_cpu_ptr(tbl->stats, cpu);
	}
	return NULL;
}

static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
A
Alexey Dobriyan 已提交
2756
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2757 2758
	int cpu;

2759
	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
L
Linus Torvalds 已提交
2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774
		if (!cpu_possible(cpu))
			continue;
		*pos = cpu+1;
		return per_cpu_ptr(tbl->stats, cpu);
	}
	return NULL;
}

static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
{

}

static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
A
Alexey Dobriyan 已提交
2775
	struct neigh_table *tbl = seq->private;
L
Linus Torvalds 已提交
2776 2777 2778
	struct neigh_statistics *st = v;

	if (v == SEQ_START_TOKEN) {
2779
		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
L
Linus Torvalds 已提交
2780 2781 2782 2783
		return 0;
	}

	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2784
			"%08lx %08lx  %08lx %08lx %08lx %08lx\n",
L
Linus Torvalds 已提交
2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799
		   atomic_read(&tbl->entries),

		   st->allocs,
		   st->destroys,
		   st->hash_grows,

		   st->lookups,
		   st->hits,

		   st->res_failed,

		   st->rcv_probes_mcast,
		   st->rcv_probes_ucast,

		   st->periodic_gc_runs,
2800
		   st->forced_gc_runs,
2801 2802
		   st->unres_discards,
		   st->table_fulls
L
Linus Torvalds 已提交
2803 2804 2805 2806 2807
		   );

	return 0;
}

2808
static const struct seq_operations neigh_stat_seq_ops = {
L
Linus Torvalds 已提交
2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820
	.start	= neigh_stat_seq_start,
	.next	= neigh_stat_seq_next,
	.stop	= neigh_stat_seq_stop,
	.show	= neigh_stat_seq_show,
};

static int neigh_stat_seq_open(struct inode *inode, struct file *file)
{
	int ret = seq_open(file, &neigh_stat_seq_ops);

	if (!ret) {
		struct seq_file *sf = file->private_data;
A
Al Viro 已提交
2821
		sf->private = PDE_DATA(inode);
L
Linus Torvalds 已提交
2822 2823 2824 2825
	}
	return ret;
};

2826
static const struct file_operations neigh_stat_seq_fops = {
L
Linus Torvalds 已提交
2827 2828 2829 2830 2831 2832 2833 2834 2835
	.owner	 = THIS_MODULE,
	.open 	 = neigh_stat_seq_open,
	.read	 = seq_read,
	.llseek	 = seq_lseek,
	.release = seq_release,
};

#endif /* CONFIG_PROC_FS */

2836 2837 2838 2839 2840 2841 2842 2843 2844
static inline size_t neigh_nlmsg_size(void)
{
	return NLMSG_ALIGN(sizeof(struct ndmsg))
	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
	       + nla_total_size(sizeof(struct nda_cacheinfo))
	       + nla_total_size(4); /* NDA_PROBES */
}

2845 2846
static void __neigh_notify(struct neighbour *n, int type, int flags,
			   u32 pid)
L
Linus Torvalds 已提交
2847
{
2848
	struct net *net = dev_net(n->dev);
2849
	struct sk_buff *skb;
2850
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
2851

2852
	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2853
	if (skb == NULL)
2854
		goto errout;
L
Linus Torvalds 已提交
2855

2856
	err = neigh_fill_info(skb, n, pid, 0, type, flags);
2857 2858 2859 2860 2861 2862
	if (err < 0) {
		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
2863 2864
	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
	return;
2865 2866
errout:
	if (err < 0)
2867
		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
L
Linus Torvalds 已提交
2868 2869
}

2870
void neigh_app_ns(struct neighbour *n)
L
Linus Torvalds 已提交
2871
{
2872
	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2873
}
2874
EXPORT_SYMBOL(neigh_app_ns);
L
Linus Torvalds 已提交
2875 2876

#ifdef CONFIG_SYSCTL
2877
static int zero;
2878
static int int_max = INT_MAX;
2879
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
L
Linus Torvalds 已提交
2880

2881 2882
static int proc_unres_qlen(struct ctl_table *ctl, int write,
			   void __user *buffer, size_t *lenp, loff_t *ppos)
E
Eric Dumazet 已提交
2883 2884
{
	int size, ret;
2885
	struct ctl_table tmp = *ctl;
E
Eric Dumazet 已提交
2886

2887 2888
	tmp.extra1 = &zero;
	tmp.extra2 = &unres_qlen_max;
E
Eric Dumazet 已提交
2889
	tmp.data = &size;
2890 2891 2892 2893

	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);

E
Eric Dumazet 已提交
2894 2895 2896 2897 2898
	if (write && !ret)
		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
	return ret;
}

2899 2900 2901
static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
						   int family)
{
2902 2903
	switch (family) {
	case AF_INET:
2904
		return __in_dev_arp_parms_get_rcu(dev);
2905 2906 2907
	case AF_INET6:
		return __in6_dev_nd_parms_get_rcu(dev);
	}
2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931
	return NULL;
}

static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
				  int index)
{
	struct net_device *dev;
	int family = neigh_parms_family(p);

	rcu_read_lock();
	for_each_netdev_rcu(net, dev) {
		struct neigh_parms *dst_p =
				neigh_get_dev_parms_rcu(dev, family);

		if (dst_p && !test_bit(index, dst_p->data_state))
			dst_p->data[index] = p->data[index];
	}
	rcu_read_unlock();
}

static void neigh_proc_update(struct ctl_table *ctl, int write)
{
	struct net_device *dev = ctl->extra1;
	struct neigh_parms *p = ctl->extra2;
2932
	struct net *net = neigh_parms_net(p);
2933 2934 2935 2936 2937 2938
	int index = (int *) ctl->data - p->data;

	if (!write)
		return;

	set_bit(index, p->data_state);
2939 2940
	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2941 2942 2943 2944
	if (!dev) /* NULL dev means this is default value */
		neigh_copy_dflt_parms(net, p, index);
}

J
Jiri Pirko 已提交
2945 2946 2947 2948 2949
static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
					   void __user *buffer,
					   size_t *lenp, loff_t *ppos)
{
	struct ctl_table tmp = *ctl;
2950
	int ret;
J
Jiri Pirko 已提交
2951 2952 2953 2954

	tmp.extra1 = &zero;
	tmp.extra2 = &int_max;

2955 2956 2957
	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2958 2959
}

J
Jiri Pirko 已提交
2960 2961 2962
int neigh_proc_dointvec(struct ctl_table *ctl, int write,
			void __user *buffer, size_t *lenp, loff_t *ppos)
{
2963 2964 2965 2966
	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2967 2968 2969 2970 2971 2972 2973
}
EXPORT_SYMBOL(neigh_proc_dointvec);

int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
				void __user *buffer,
				size_t *lenp, loff_t *ppos)
{
2974 2975 2976 2977
	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2978 2979 2980 2981 2982 2983 2984
}
EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);

static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
					      void __user *buffer,
					      size_t *lenp, loff_t *ppos)
{
2985 2986 2987 2988
	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2989 2990 2991 2992 2993 2994
}

int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
				   void __user *buffer,
				   size_t *lenp, loff_t *ppos)
{
2995 2996 2997 2998
	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
2999 3000 3001 3002 3003 3004 3005
}
EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);

static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
					  void __user *buffer,
					  size_t *lenp, loff_t *ppos)
{
3006 3007 3008 3009
	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);

	neigh_proc_update(ctl, write);
	return ret;
J
Jiri Pirko 已提交
3010 3011
}

3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036
static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
					  void __user *buffer,
					  size_t *lenp, loff_t *ppos)
{
	struct neigh_parms *p = ctl->extra2;
	int ret;

	if (strcmp(ctl->procname, "base_reachable_time") == 0)
		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
	else
		ret = -1;

	if (write && ret == 0) {
		/* update reachable_time as well, otherwise, the change will
		 * only be effective after the next time neigh_periodic_work
		 * decides to recompute it
		 */
		p->reachable_time =
			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
	}
	return ret;
}

J
Jiri Pirko 已提交
3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052
#define NEIGH_PARMS_DATA_OFFSET(index)	\
	(&((struct neigh_parms *) 0)->data[index])

#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
	[NEIGH_VAR_ ## attr] = { \
		.procname	= name, \
		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
		.maxlen		= sizeof(int), \
		.mode		= mval, \
		.proc_handler	= proc, \
	}

#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)

#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
J
Jiri Pirko 已提交
3053
	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
J
Jiri Pirko 已提交
3054 3055

#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
J
Jiri Pirko 已提交
3056
	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
J
Jiri Pirko 已提交
3057 3058

#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
J
Jiri Pirko 已提交
3059
	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
J
Jiri Pirko 已提交
3060 3061

#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
J
Jiri Pirko 已提交
3062
	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
J
Jiri Pirko 已提交
3063 3064

#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
J
Jiri Pirko 已提交
3065
	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3066

L
Linus Torvalds 已提交
3067 3068
static struct neigh_sysctl_table {
	struct ctl_table_header *sysctl_header;
E
Eric Dumazet 已提交
3069
	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3070
} neigh_sysctl_template __read_mostly = {
L
Linus Torvalds 已提交
3071
	.neigh_vars = {
J
Jiri Pirko 已提交
3072 3073 3074
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3075
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
J
Jiri Pirko 已提交
3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087
		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
E
Eric Dumazet 已提交
3088
		[NEIGH_VAR_GC_INTERVAL] = {
L
Linus Torvalds 已提交
3089 3090 3091
			.procname	= "gc_interval",
			.maxlen		= sizeof(int),
			.mode		= 0644,
A
Alexey Dobriyan 已提交
3092
			.proc_handler	= proc_dointvec_jiffies,
L
Linus Torvalds 已提交
3093
		},
E
Eric Dumazet 已提交
3094
		[NEIGH_VAR_GC_THRESH1] = {
L
Linus Torvalds 已提交
3095 3096 3097
			.procname	= "gc_thresh1",
			.maxlen		= sizeof(int),
			.mode		= 0644,
3098 3099 3100
			.extra1 	= &zero,
			.extra2		= &int_max,
			.proc_handler	= proc_dointvec_minmax,
L
Linus Torvalds 已提交
3101
		},
E
Eric Dumazet 已提交
3102
		[NEIGH_VAR_GC_THRESH2] = {
L
Linus Torvalds 已提交
3103 3104 3105
			.procname	= "gc_thresh2",
			.maxlen		= sizeof(int),
			.mode		= 0644,
3106 3107 3108
			.extra1 	= &zero,
			.extra2		= &int_max,
			.proc_handler	= proc_dointvec_minmax,
L
Linus Torvalds 已提交
3109
		},
E
Eric Dumazet 已提交
3110
		[NEIGH_VAR_GC_THRESH3] = {
L
Linus Torvalds 已提交
3111 3112 3113
			.procname	= "gc_thresh3",
			.maxlen		= sizeof(int),
			.mode		= 0644,
3114 3115 3116
			.extra1 	= &zero,
			.extra2		= &int_max,
			.proc_handler	= proc_dointvec_minmax,
L
Linus Torvalds 已提交
3117
		},
3118
		{},
L
Linus Torvalds 已提交
3119 3120 3121 3122
	},
};

int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3123
			  proc_handler *handler)
L
Linus Torvalds 已提交
3124
{
J
Jiri Pirko 已提交
3125
	int i;
3126
	struct neigh_sysctl_table *t;
J
Jiri Pirko 已提交
3127
	const char *dev_name_source;
3128
	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3129
	char *p_name;
L
Linus Torvalds 已提交
3130

3131
	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
L
Linus Torvalds 已提交
3132
	if (!t)
3133 3134
		goto err;

3135
	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
J
Jiri Pirko 已提交
3136
		t->neigh_vars[i].data += (long) p;
J
Jiri Pirko 已提交
3137
		t->neigh_vars[i].extra1 = dev;
3138
		t->neigh_vars[i].extra2 = p;
J
Jiri Pirko 已提交
3139
	}
L
Linus Torvalds 已提交
3140 3141 3142

	if (dev) {
		dev_name_source = dev->name;
3143
		/* Terminate the table early */
E
Eric Dumazet 已提交
3144 3145
		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
L
Linus Torvalds 已提交
3146
	} else {
3147
		struct neigh_table *tbl = p->tbl;
3148
		dev_name_source = "default";
3149 3150 3151 3152
		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
L
Linus Torvalds 已提交
3153 3154
	}

3155
	if (handler) {
L
Linus Torvalds 已提交
3156
		/* RetransTime */
E
Eric Dumazet 已提交
3157
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
L
Linus Torvalds 已提交
3158
		/* ReachableTime */
E
Eric Dumazet 已提交
3159
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
L
Linus Torvalds 已提交
3160
		/* RetransTime (in milliseconds)*/
E
Eric Dumazet 已提交
3161
		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
L
Linus Torvalds 已提交
3162
		/* ReachableTime (in milliseconds) */
E
Eric Dumazet 已提交
3163
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176
	} else {
		/* Those handlers will update p->reachable_time after
		 * base_reachable_time(_ms) is set to ensure the new timer starts being
		 * applied after the next neighbour update instead of waiting for
		 * neigh_periodic_work to update its value (can be multiple minutes)
		 * So any handler that replaces them should do this as well
		 */
		/* ReachableTime */
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
			neigh_proc_base_reachable_time;
		/* ReachableTime (in milliseconds) */
		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
			neigh_proc_base_reachable_time;
L
Linus Torvalds 已提交
3177 3178
	}

3179 3180 3181 3182
	/* Don't export sysctls to unprivileged users */
	if (neigh_parms_net(p)->user_ns != &init_user_ns)
		t->neigh_vars[0].procname = NULL;

3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193
	switch (neigh_parms_family(p)) {
	case AF_INET:
	      p_name = "ipv4";
	      break;
	case AF_INET6:
	      p_name = "ipv6";
	      break;
	default:
	      BUG();
	}

3194 3195
	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
		p_name, dev_name_source);
3196
	t->sysctl_header =
3197
		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3198
	if (!t->sysctl_header)
3199
		goto free;
3200

L
Linus Torvalds 已提交
3201 3202 3203
	p->sysctl_table = t;
	return 0;

3204
free:
L
Linus Torvalds 已提交
3205
	kfree(t);
3206 3207
err:
	return -ENOBUFS;
L
Linus Torvalds 已提交
3208
}
3209
EXPORT_SYMBOL(neigh_sysctl_register);
L
Linus Torvalds 已提交
3210 3211 3212 3213 3214 3215

void neigh_sysctl_unregister(struct neigh_parms *p)
{
	if (p->sysctl_table) {
		struct neigh_sysctl_table *t = p->sysctl_table;
		p->sysctl_table = NULL;
3216
		unregister_net_sysctl_table(t->sysctl_header);
L
Linus Torvalds 已提交
3217 3218 3219
		kfree(t);
	}
}
3220
EXPORT_SYMBOL(neigh_sysctl_unregister);
L
Linus Torvalds 已提交
3221 3222 3223

#endif	/* CONFIG_SYSCTL */

3224 3225
static int __init neigh_init(void)
{
3226 3227 3228
	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3229

3230 3231 3232
	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
		      NULL);
	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3233 3234 3235 3236 3237 3238

	return 0;
}

subsys_initcall(neigh_init);