devinet.c 68.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3 4 5
/*
 *	NET3	IP device support routines.
 *
 *	Derived from the IP parts of dev.c 1.0.19
6
 * 		Authors:	Ross Biro
L
Linus Torvalds 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
 *
 *	Additional Authors:
 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *
 *	Changes:
 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
 *					lists.
 *		Cyrus Durgin:		updated for kmod
 *		Matthias Andree:	in devinet_ioctl, compare label and
 *					address (4.4BSD alias style support),
 *					fall back to comparing just the label
 *					if no match found.
 */


25
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
26
#include <linux/bitops.h>
27
#include <linux/capability.h>
L
Linus Torvalds 已提交
28 29 30
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
31
#include <linux/sched/signal.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37 38
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
39
#include <linux/if_addr.h>
L
Linus Torvalds 已提交
40 41 42 43 44 45 46 47 48
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
49
#include <linux/slab.h>
50
#include <linux/hash.h>
L
Linus Torvalds 已提交
51 52 53 54
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
#include <linux/kmod.h>
55
#include <linux/netconf.h>
L
Linus Torvalds 已提交
56

57
#include <net/arp.h>
L
Linus Torvalds 已提交
58 59 60
#include <net/ip.h>
#include <net/route.h>
#include <net/ip_fib.h>
61
#include <net/rtnetlink.h>
62
#include <net/net_namespace.h>
J
Jiri Pirko 已提交
63
#include <net/addrconf.h>
L
Linus Torvalds 已提交
64

65 66 67 68 69
#define IPV6ONLY_FLAGS	\
		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)

70
static struct ipv4_devconf ipv4_devconf = {
71
	.data = {
72 73 74 75
		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 77
		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78
		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79
	},
L
Linus Torvalds 已提交
80 81 82
};

static struct ipv4_devconf ipv4_devconf_dflt = {
83
	.data = {
84 85 86 87 88
		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 90
		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91
		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92
	},
L
Linus Torvalds 已提交
93 94
};

95 96
#define IPV4_DEVCONF_DFLT(net, attr) \
	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97

98
static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 100 101
	[IFA_LOCAL]     	= { .type = NLA_U32 },
	[IFA_ADDRESS]   	= { .type = NLA_U32 },
	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102
	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
J
Jiri Pirko 已提交
103
	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104
	[IFA_FLAGS]		= { .type = NLA_U32 },
105
	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106
	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 108
};

C
Christian Brauner 已提交
109 110 111 112 113 114
struct inet_fill_args {
	u32 portid;
	u32 seq;
	int event;
	unsigned int flags;
	int netnsid;
115
	int ifindex;
C
Christian Brauner 已提交
116 117
};

E
Eric Dumazet 已提交
118 119 120
#define IN4_ADDR_HSIZE_SHIFT	8
#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)

121 122
static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];

123
static u32 inet_addr_hash(const struct net *net, __be32 addr)
124
{
E
Eric Dumazet 已提交
125
	u32 val = (__force u32) addr ^ net_hash_mix(net);
126

E
Eric Dumazet 已提交
127
	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
128 129 130 131
}

static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
{
E
Eric Dumazet 已提交
132
	u32 hash = inet_addr_hash(net, ifa->ifa_local);
133

134
	ASSERT_RTNL();
135 136 137 138 139
	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
}

static void inet_hash_remove(struct in_ifaddr *ifa)
{
140
	ASSERT_RTNL();
141 142 143
	hlist_del_init_rcu(&ifa->hash);
}

144 145 146 147 148 149 150 151 152 153 154 155 156 157
/**
 * __ip_dev_find - find the first device with a given source address.
 * @net: the net namespace
 * @addr: the source address
 * @devref: if true, take a reference on the found device
 *
 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 */
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
	struct net_device *result = NULL;
	struct in_ifaddr *ifa;

	rcu_read_lock();
158 159
	ifa = inet_lookup_ifaddr_rcu(net, addr);
	if (!ifa) {
160 161 162 163 164 165 166 167 168 169 170 171
		struct flowi4 fl4 = { .daddr = addr };
		struct fib_result res = { 0 };
		struct fib_table *local;

		/* Fallback to FIB local table so that communication
		 * over loopback subnets work.
		 */
		local = fib_get_table(net, RT_TABLE_LOCAL);
		if (local &&
		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
		    res.type == RTN_LOCAL)
			result = FIB_RES_DEV(res);
172 173
	} else {
		result = ifa->ifa_dev->dev;
174
	}
175 176 177 178 179 180 181
	if (result && devref)
		dev_hold(result);
	rcu_read_unlock();
	return result;
}
EXPORT_SYMBOL(__ip_dev_find);

182 183 184 185 186 187 188 189 190 191 192 193 194 195
/* called under RCU lock */
struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
{
	u32 hash = inet_addr_hash(net, addr);
	struct in_ifaddr *ifa;

	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
		if (ifa->ifa_local == addr &&
		    net_eq(dev_net(ifa->ifa_dev->dev), net))
			return ifa;

	return NULL;
}

196
static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
L
Linus Torvalds 已提交
197

198
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
199
static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
200 201
static void inet_del_ifa(struct in_device *in_dev,
			 struct in_ifaddr __rcu **ifap,
L
Linus Torvalds 已提交
202 203
			 int destroy);
#ifdef CONFIG_SYSCTL
204
static int devinet_sysctl_register(struct in_device *idev);
205 206
static void devinet_sysctl_unregister(struct in_device *idev);
#else
207
static int devinet_sysctl_register(struct in_device *idev)
208
{
209
	return 0;
210
}
E
Eric Dumazet 已提交
211
static void devinet_sysctl_unregister(struct in_device *idev)
212 213
{
}
L
Linus Torvalds 已提交
214 215 216 217 218 219
#endif

/* Locks all the inet devices. */

static struct in_ifaddr *inet_alloc_ifa(void)
{
220
	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
L
Linus Torvalds 已提交
221 222 223 224 225 226 227 228 229 230
}

static void inet_rcu_free_ifa(struct rcu_head *head)
{
	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
	if (ifa->ifa_dev)
		in_dev_put(ifa->ifa_dev);
	kfree(ifa);
}

E
Eric Dumazet 已提交
231
static void inet_free_ifa(struct in_ifaddr *ifa)
L
Linus Torvalds 已提交
232 233 234 235 236 237 238 239
{
	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
}

void in_dev_finish_destroy(struct in_device *idev)
{
	struct net_device *dev = idev->dev;

240 241
	WARN_ON(idev->ifa_list);
	WARN_ON(idev->mc_list);
242
	kfree(rcu_dereference_protected(idev->mc_hash, 1));
L
Linus Torvalds 已提交
243
#ifdef NET_REFCNT_DEBUG
244
	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
L
Linus Torvalds 已提交
245
#endif
246
	dev_put_track(dev, &idev->dev_tracker);
L
Linus Torvalds 已提交
247
	if (!idev->dead)
E
Eric Dumazet 已提交
248 249
		pr_err("Freeing alive in_device %p\n", idev);
	else
L
Linus Torvalds 已提交
250 251
		kfree(idev);
}
E
Eric Dumazet 已提交
252
EXPORT_SYMBOL(in_dev_finish_destroy);
L
Linus Torvalds 已提交
253

254
static struct in_device *inetdev_init(struct net_device *dev)
L
Linus Torvalds 已提交
255 256
{
	struct in_device *in_dev;
257
	int err = -ENOMEM;
L
Linus Torvalds 已提交
258 259 260

	ASSERT_RTNL();

261
	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
L
Linus Torvalds 已提交
262 263
	if (!in_dev)
		goto out;
264
	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
265
			sizeof(in_dev->cnf));
L
Linus Torvalds 已提交
266 267
	in_dev->cnf.sysctl = NULL;
	in_dev->dev = dev;
E
Eric Dumazet 已提交
268 269
	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
	if (!in_dev->arp_parms)
L
Linus Torvalds 已提交
270
		goto out_kfree;
271 272
	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
		dev_disable_lro(dev);
L
Linus Torvalds 已提交
273
	/* Reference in_dev->dev */
274
	dev_hold_track(dev, &in_dev->dev_tracker, GFP_KERNEL);
275
	/* Account for reference dev->ip_ptr (below) */
276
	refcount_set(&in_dev->refcnt, 1);
L
Linus Torvalds 已提交
277

278 279 280
	err = devinet_sysctl_register(in_dev);
	if (err) {
		in_dev->dead = 1;
281
		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
282 283 284 285
		in_dev_put(in_dev);
		in_dev = NULL;
		goto out;
	}
L
Linus Torvalds 已提交
286 287 288
	ip_mc_init_dev(in_dev);
	if (dev->flags & IFF_UP)
		ip_mc_up(in_dev);
289

290
	/* we can receive as soon as ip_ptr is set -- do this last */
291
	rcu_assign_pointer(dev->ip_ptr, in_dev);
292
out:
293
	return in_dev ?: ERR_PTR(err);
L
Linus Torvalds 已提交
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
out_kfree:
	kfree(in_dev);
	in_dev = NULL;
	goto out;
}

static void in_dev_rcu_put(struct rcu_head *head)
{
	struct in_device *idev = container_of(head, struct in_device, rcu_head);
	in_dev_put(idev);
}

static void inetdev_destroy(struct in_device *in_dev)
{
	struct net_device *dev;
309
	struct in_ifaddr *ifa;
L
Linus Torvalds 已提交
310 311 312 313 314 315 316 317 318

	ASSERT_RTNL();

	dev = in_dev->dev;

	in_dev->dead = 1;

	ip_mc_destroy_dev(in_dev);

319
	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
L
Linus Torvalds 已提交
320 321 322 323
		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
		inet_free_ifa(ifa);
	}

324
	RCU_INIT_POINTER(dev->ip_ptr, NULL);
L
Linus Torvalds 已提交
325

326
	devinet_sysctl_unregister(in_dev);
L
Linus Torvalds 已提交
327 328 329 330 331 332
	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
	arp_ifdown(dev);

	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
}

A
Al Viro 已提交
333
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
L
Linus Torvalds 已提交
334
{
335 336
	const struct in_ifaddr *ifa;

L
Linus Torvalds 已提交
337
	rcu_read_lock();
338
	in_dev_for_each_ifa_rcu(ifa, in_dev) {
L
Linus Torvalds 已提交
339 340 341 342 343 344
		if (inet_ifa_match(a, ifa)) {
			if (!b || inet_ifa_match(b, ifa)) {
				rcu_read_unlock();
				return 1;
			}
		}
345
	}
L
Linus Torvalds 已提交
346 347 348 349
	rcu_read_unlock();
	return 0;
}

350 351 352
static void __inet_del_ifa(struct in_device *in_dev,
			   struct in_ifaddr __rcu **ifap,
			   int destroy, struct nlmsghdr *nlh, u32 portid)
L
Linus Torvalds 已提交
353
{
354
	struct in_ifaddr *promote = NULL;
355 356
	struct in_ifaddr *ifa, *ifa1;
	struct in_ifaddr *last_prim;
357 358
	struct in_ifaddr *prev_prom = NULL;
	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
L
Linus Torvalds 已提交
359 360 361

	ASSERT_RTNL();

362 363
	ifa1 = rtnl_dereference(*ifap);
	last_prim = rtnl_dereference(in_dev->ifa_list);
364 365 366
	if (in_dev->dead)
		goto no_promotions;

367
	/* 1. Deleting primary ifaddr forces deletion all secondaries
368 369
	 * unless alias promotion is set
	 **/
L
Linus Torvalds 已提交
370 371

	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
372
		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
L
Linus Torvalds 已提交
373

374
		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
375
			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
376 377 378
			    ifa1->ifa_scope <= ifa->ifa_scope)
				last_prim = ifa;

L
Linus Torvalds 已提交
379 380 381 382
			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
			    ifa1->ifa_mask != ifa->ifa_mask ||
			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
				ifap1 = &ifa->ifa_next;
383
				prev_prom = ifa;
L
Linus Torvalds 已提交
384 385 386
				continue;
			}

387
			if (!do_promote) {
388
				inet_hash_remove(ifa);
389
				*ifap1 = ifa->ifa_next;
L
Linus Torvalds 已提交
390

391
				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
392 393
				blocking_notifier_call_chain(&inetaddr_chain,
						NETDEV_DOWN, ifa);
394 395 396 397 398
				inet_free_ifa(ifa);
			} else {
				promote = ifa;
				break;
			}
L
Linus Torvalds 已提交
399 400 401
		}
	}

402 403 404 405 406
	/* On promotion all secondaries from subnet are changing
	 * the primary IP, we must remove all their routes silently
	 * and later to add them back with new prefsrc. Do this
	 * while all addresses are on the device list.
	 */
407
	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
408 409 410 411 412
		if (ifa1->ifa_mask == ifa->ifa_mask &&
		    inet_ifa_match(ifa1->ifa_address, ifa))
			fib_del_ifaddr(ifa, ifa1);
	}

413
no_promotions:
L
Linus Torvalds 已提交
414 415 416
	/* 2. Unlink it */

	*ifap = ifa1->ifa_next;
417
	inet_hash_remove(ifa1);
L
Linus Torvalds 已提交
418 419 420 421 422 423 424 425 426 427 428

	/* 3. Announce address deletion */

	/* Send message first, then call notifier.
	   At first sight, FIB update triggered by notifier
	   will refer to already deleted ifaddr, that could confuse
	   netlink listeners. It is not true: look, gated sees
	   that route deleted and if it still thinks that ifaddr
	   is valid, it will try to restore deleted routes... Grr.
	   So that, this order is correct.
	 */
429
	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
430
	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
L
Linus Torvalds 已提交
431

432
	if (promote) {
433
		struct in_ifaddr *next_sec;
434

435
		next_sec = rtnl_dereference(promote->ifa_next);
436
		if (prev_prom) {
437 438 439
			struct in_ifaddr *last_sec;

			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
440 441

			last_sec = rtnl_dereference(last_prim->ifa_next);
442 443
			rcu_assign_pointer(promote->ifa_next, last_sec);
			rcu_assign_pointer(last_prim->ifa_next, promote);
444
		}
445 446

		promote->ifa_flags &= ~IFA_F_SECONDARY;
447
		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
448 449
		blocking_notifier_call_chain(&inetaddr_chain,
				NETDEV_UP, promote);
450 451
		for (ifa = next_sec; ifa;
		     ifa = rtnl_dereference(ifa->ifa_next)) {
452 453 454 455 456 457 458
			if (ifa1->ifa_mask != ifa->ifa_mask ||
			    !inet_ifa_match(ifa1->ifa_address, ifa))
					continue;
			fib_add_ifaddr(ifa);
		}

	}
459
	if (destroy)
460
		inet_free_ifa(ifa1);
L
Linus Torvalds 已提交
461 462
}

463 464
static void inet_del_ifa(struct in_device *in_dev,
			 struct in_ifaddr __rcu **ifap,
465 466 467 468 469
			 int destroy)
{
	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
}

J
Jiri Pirko 已提交
470 471 472 473
static void check_lifetime(struct work_struct *work);

static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);

474
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
475
			     u32 portid, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
476
{
477
	struct in_ifaddr __rcu **last_primary, **ifap;
L
Linus Torvalds 已提交
478
	struct in_device *in_dev = ifa->ifa_dev;
479
	struct in_validator_info ivi;
480
	struct in_ifaddr *ifa1;
481
	int ret;
L
Linus Torvalds 已提交
482 483 484 485 486 487 488 489 490 491 492

	ASSERT_RTNL();

	if (!ifa->ifa_local) {
		inet_free_ifa(ifa);
		return 0;
	}

	ifa->ifa_flags &= ~IFA_F_SECONDARY;
	last_primary = &in_dev->ifa_list;

493 494 495
	/* Don't set IPv6 only flags to IPv4 addresses */
	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;

496 497 498 499
	ifap = &in_dev->ifa_list;
	ifa1 = rtnl_dereference(*ifap);

	while (ifa1) {
L
Linus Torvalds 已提交
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
		    ifa->ifa_scope <= ifa1->ifa_scope)
			last_primary = &ifa1->ifa_next;
		if (ifa1->ifa_mask == ifa->ifa_mask &&
		    inet_ifa_match(ifa1->ifa_address, ifa)) {
			if (ifa1->ifa_local == ifa->ifa_local) {
				inet_free_ifa(ifa);
				return -EEXIST;
			}
			if (ifa1->ifa_scope != ifa->ifa_scope) {
				inet_free_ifa(ifa);
				return -EINVAL;
			}
			ifa->ifa_flags |= IFA_F_SECONDARY;
		}
515 516 517

		ifap = &ifa1->ifa_next;
		ifa1 = rtnl_dereference(*ifap);
L
Linus Torvalds 已提交
518 519
	}

520 521 522 523 524 525 526 527 528
	/* Allow any devices that wish to register ifaddr validtors to weigh
	 * in now, before changes are committed.  The rntl lock is serializing
	 * access here, so the state should not change between a validator call
	 * and a final notify on commit.  This isn't invoked on promotion under
	 * the assumption that validators are checking the address itself, and
	 * not the flags.
	 */
	ivi.ivi_addr = ifa->ifa_address;
	ivi.ivi_dev = ifa->ifa_dev;
529
	ivi.extack = extack;
530 531 532 533 534 535 536 537
	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
					   NETDEV_UP, &ivi);
	ret = notifier_to_errno(ret);
	if (ret) {
		inet_free_ifa(ifa);
		return ret;
	}

L
Linus Torvalds 已提交
538
	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
539
		prandom_seed((__force u32) ifa->ifa_local);
L
Linus Torvalds 已提交
540 541 542
		ifap = last_primary;
	}

543 544
	rcu_assign_pointer(ifa->ifa_next, *ifap);
	rcu_assign_pointer(*ifap, ifa);
L
Linus Torvalds 已提交
545

546 547
	inet_hash_insert(dev_net(in_dev->dev), ifa);

J
Jiri Pirko 已提交
548
	cancel_delayed_work(&check_lifetime_work);
549
	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
J
Jiri Pirko 已提交
550

L
Linus Torvalds 已提交
551 552 553
	/* Send message first, then call notifier.
	   Notifier will trigger FIB update, so that
	   listeners of netlink will know about new ifaddr */
554
	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
555
	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
L
Linus Torvalds 已提交
556 557 558 559

	return 0;
}

560 561
static int inet_insert_ifa(struct in_ifaddr *ifa)
{
562
	return __inet_insert_ifa(ifa, NULL, 0, NULL);
563 564
}

L
Linus Torvalds 已提交
565 566
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
567
	struct in_device *in_dev = __in_dev_get_rtnl(dev);
L
Linus Torvalds 已提交
568 569 570 571

	ASSERT_RTNL();

	if (!in_dev) {
572 573
		inet_free_ifa(ifa);
		return -ENOBUFS;
L
Linus Torvalds 已提交
574
	}
575
	ipv4_devconf_setall(in_dev);
576
	neigh_parms_data_state_setall(in_dev->arp_parms);
L
Linus Torvalds 已提交
577
	if (ifa->ifa_dev != in_dev) {
578
		WARN_ON(ifa->ifa_dev);
L
Linus Torvalds 已提交
579 580 581
		in_dev_hold(in_dev);
		ifa->ifa_dev = in_dev;
	}
582
	if (ipv4_is_loopback(ifa->ifa_local))
L
Linus Torvalds 已提交
583 584 585 586
		ifa->ifa_scope = RT_SCOPE_HOST;
	return inet_insert_ifa(ifa);
}

587 588 589
/* Caller must hold RCU or RTNL :
 * We dont take a reference on found in_device
 */
590
struct in_device *inetdev_by_index(struct net *net, int ifindex)
L
Linus Torvalds 已提交
591 592 593
{
	struct net_device *dev;
	struct in_device *in_dev = NULL;
594 595 596

	rcu_read_lock();
	dev = dev_get_by_index_rcu(net, ifindex);
L
Linus Torvalds 已提交
597
	if (dev)
598
		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
599
	rcu_read_unlock();
L
Linus Torvalds 已提交
600 601
	return in_dev;
}
E
Eric Dumazet 已提交
602
EXPORT_SYMBOL(inetdev_by_index);
L
Linus Torvalds 已提交
603 604 605

/* Called only from RTNL semaphored context. No locks. */

A
Al Viro 已提交
606 607
struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
				    __be32 mask)
L
Linus Torvalds 已提交
608
{
609 610
	struct in_ifaddr *ifa;

L
Linus Torvalds 已提交
611 612
	ASSERT_RTNL();

613
	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
L
Linus Torvalds 已提交
614 615
		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
			return ifa;
616
	}
L
Linus Torvalds 已提交
617 618 619
	return NULL;
}

620 621
static int ip_mc_autojoin_config(struct net *net, bool join,
				 const struct in_ifaddr *ifa)
622
{
623
#if defined(CONFIG_IP_MULTICAST)
624 625 626 627
	struct ip_mreqn mreq = {
		.imr_multiaddr.s_addr = ifa->ifa_address,
		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
	};
628
	struct sock *sk = net->ipv4.mc_autojoin_sk;
629 630 631 632 633 634
	int ret;

	ASSERT_RTNL();

	lock_sock(sk);
	if (join)
635
		ret = ip_mc_join_group(sk, &mreq);
636
	else
637
		ret = ip_mc_leave_group(sk, &mreq);
638 639 640
	release_sock(sk);

	return ret;
641 642 643
#else
	return -EOPNOTSUPP;
#endif
644 645
}

646 647
static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
			    struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
648
{
649
	struct net *net = sock_net(skb->sk);
650
	struct in_ifaddr __rcu **ifap;
651
	struct nlattr *tb[IFA_MAX+1];
L
Linus Torvalds 已提交
652
	struct in_device *in_dev;
653
	struct ifaddrmsg *ifm;
654
	struct in_ifaddr *ifa;
655
	int err;
L
Linus Torvalds 已提交
656 657 658

	ASSERT_RTNL();

659 660
	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
				     ifa_ipv4_policy, extack);
661 662 663 664
	if (err < 0)
		goto errout;

	ifm = nlmsg_data(nlh);
665
	in_dev = inetdev_by_index(net, ifm->ifa_index);
666
	if (!in_dev) {
667 668 669 670
		err = -ENODEV;
		goto errout;
	}

671
	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
L
Linus Torvalds 已提交
672
	     ifap = &ifa->ifa_next) {
673
		if (tb[IFA_LOCAL] &&
674
		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
675 676 677
			continue;

		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
L
Linus Torvalds 已提交
678
			continue;
679 680 681

		if (tb[IFA_ADDRESS] &&
		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
682
		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
683 684
			continue;

685
		if (ipv4_is_multicast(ifa->ifa_address))
686
			ip_mc_autojoin_config(net, false, ifa);
687
		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
L
Linus Torvalds 已提交
688 689
		return 0;
	}
690 691 692 693

	err = -EADDRNOTAVAIL;
errout:
	return err;
L
Linus Torvalds 已提交
694 695
}

J
Jiri Pirko 已提交
696 697 698 699 700 701
#define INFINITY_LIFE_TIME	0xFFFFFFFF

static void check_lifetime(struct work_struct *work)
{
	unsigned long now, next, next_sec, next_sched;
	struct in_ifaddr *ifa;
702
	struct hlist_node *n;
J
Jiri Pirko 已提交
703 704 705 706 707 708
	int i;

	now = jiffies;
	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);

	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
709 710 711
		bool change_needed = false;

		rcu_read_lock();
712
		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
J
Jiri Pirko 已提交
713 714 715 716 717 718 719 720 721 722 723
			unsigned long age;

			if (ifa->ifa_flags & IFA_F_PERMANENT)
				continue;

			/* We try to batch several events at once. */
			age = (now - ifa->ifa_tstamp +
			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;

			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
			    age >= ifa->ifa_valid_lft) {
724
				change_needed = true;
J
Jiri Pirko 已提交
725 726 727 728 729 730 731 732 733
			} else if (ifa->ifa_preferred_lft ==
				   INFINITY_LIFE_TIME) {
				continue;
			} else if (age >= ifa->ifa_preferred_lft) {
				if (time_before(ifa->ifa_tstamp +
						ifa->ifa_valid_lft * HZ, next))
					next = ifa->ifa_tstamp +
					       ifa->ifa_valid_lft * HZ;

734 735
				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
					change_needed = true;
J
Jiri Pirko 已提交
736 737 738 739 740 741 742
			} else if (time_before(ifa->ifa_tstamp +
					       ifa->ifa_preferred_lft * HZ,
					       next)) {
				next = ifa->ifa_tstamp +
				       ifa->ifa_preferred_lft * HZ;
			}
		}
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
		rcu_read_unlock();
		if (!change_needed)
			continue;
		rtnl_lock();
		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
			unsigned long age;

			if (ifa->ifa_flags & IFA_F_PERMANENT)
				continue;

			/* We try to batch several events at once. */
			age = (now - ifa->ifa_tstamp +
			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;

			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
			    age >= ifa->ifa_valid_lft) {
759 760 761 762 763 764
				struct in_ifaddr __rcu **ifap;
				struct in_ifaddr *tmp;

				ifap = &ifa->ifa_dev->ifa_list;
				tmp = rtnl_dereference(*ifap);
				while (tmp) {
765
					if (tmp == ifa) {
766 767 768 769
						inet_del_ifa(ifa->ifa_dev,
							     ifap, 1);
						break;
					}
770 771
					ifap = &tmp->ifa_next;
					tmp = rtnl_dereference(*ifap);
772 773 774 775 776 777 778 779 780 781
				}
			} else if (ifa->ifa_preferred_lft !=
				   INFINITY_LIFE_TIME &&
				   age >= ifa->ifa_preferred_lft &&
				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
				ifa->ifa_flags |= IFA_F_DEPRECATED;
				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
			}
		}
		rtnl_unlock();
J
Jiri Pirko 已提交
782 783 784 785 786 787 788 789 790 791 792 793 794 795
	}

	next_sec = round_jiffies_up(next);
	next_sched = next;

	/* If rounded timeout is accurate enough, accept it. */
	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
		next_sched = next_sec;

	now = jiffies;
	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;

796 797
	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
			next_sched - now);
J
Jiri Pirko 已提交
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
}

static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
			     __u32 prefered_lft)
{
	unsigned long timeout;

	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);

	timeout = addrconf_timeout_fixup(valid_lft, HZ);
	if (addrconf_finite_timeout(timeout))
		ifa->ifa_valid_lft = timeout;
	else
		ifa->ifa_flags |= IFA_F_PERMANENT;

	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
	if (addrconf_finite_timeout(timeout)) {
		if (timeout == 0)
			ifa->ifa_flags |= IFA_F_DEPRECATED;
		ifa->ifa_preferred_lft = timeout;
	}
	ifa->ifa_tstamp = jiffies;
	if (!ifa->ifa_cstamp)
		ifa->ifa_cstamp = ifa->ifa_tstamp;
}

static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
D
David Ahern 已提交
825 826
				       __u32 *pvalid_lft, __u32 *pprefered_lft,
				       struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
827
{
828 829 830
	struct nlattr *tb[IFA_MAX+1];
	struct in_ifaddr *ifa;
	struct ifaddrmsg *ifm;
L
Linus Torvalds 已提交
831 832
	struct net_device *dev;
	struct in_device *in_dev;
833
	int err;
L
Linus Torvalds 已提交
834

835 836
	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
				     ifa_ipv4_policy, extack);
837 838
	if (err < 0)
		goto errout;
L
Linus Torvalds 已提交
839

840
	ifm = nlmsg_data(nlh);
841
	err = -EINVAL;
842
	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
843
		goto errout;
L
Linus Torvalds 已提交
844

845
	dev = __dev_get_by_index(net, ifm->ifa_index);
846
	err = -ENODEV;
847
	if (!dev)
848
		goto errout;
L
Linus Torvalds 已提交
849

850
	in_dev = __in_dev_get_rtnl(dev);
851
	err = -ENOBUFS;
852
	if (!in_dev)
853
		goto errout;
L
Linus Torvalds 已提交
854

855
	ifa = inet_alloc_ifa();
856
	if (!ifa)
857 858 859 860 861 862
		/*
		 * A potential indev allocation can be left alive, it stays
		 * assigned to its device and is destroy with it.
		 */
		goto errout;

863
	ipv4_devconf_setall(in_dev);
864
	neigh_parms_data_state_setall(in_dev->arp_parms);
865 866
	in_dev_hold(in_dev);

867
	if (!tb[IFA_ADDRESS])
868
		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
L
Linus Torvalds 已提交
869

870
	INIT_HLIST_NODE(&ifa->hash);
L
Linus Torvalds 已提交
871 872
	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
873 874
	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
					 ifm->ifa_flags;
L
Linus Torvalds 已提交
875
	ifa->ifa_scope = ifm->ifa_scope;
876 877
	ifa->ifa_dev = in_dev;

878 879
	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
880 881

	if (tb[IFA_BROADCAST])
882
		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
883 884

	if (tb[IFA_LABEL])
885
		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
L
Linus Torvalds 已提交
886 887 888
	else
		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);

889 890 891
	if (tb[IFA_RT_PRIORITY])
		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);

J
Jiri Pirko 已提交
892 893 894 895 896 897
	if (tb[IFA_CACHEINFO]) {
		struct ifa_cacheinfo *ci;

		ci = nla_data(tb[IFA_CACHEINFO]);
		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
			err = -EINVAL;
898
			goto errout_free;
J
Jiri Pirko 已提交
899 900 901 902 903
		}
		*pvalid_lft = ci->ifa_valid;
		*pprefered_lft = ci->ifa_prefered;
	}

904 905
	return ifa;

906 907
errout_free:
	inet_free_ifa(ifa);
908 909 910 911
errout:
	return ERR_PTR(err);
}

J
Jiri Pirko 已提交
912 913 914
static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
{
	struct in_device *in_dev = ifa->ifa_dev;
915
	struct in_ifaddr *ifa1;
J
Jiri Pirko 已提交
916 917 918 919

	if (!ifa->ifa_local)
		return NULL;

920
	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
J
Jiri Pirko 已提交
921 922 923 924 925 926 927 928
		if (ifa1->ifa_mask == ifa->ifa_mask &&
		    inet_ifa_match(ifa1->ifa_address, ifa) &&
		    ifa1->ifa_local == ifa->ifa_local)
			return ifa1;
	}
	return NULL;
}

929 930
static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
			    struct netlink_ext_ack *extack)
931
{
932
	struct net *net = sock_net(skb->sk);
933
	struct in_ifaddr *ifa;
J
Jiri Pirko 已提交
934 935 936
	struct in_ifaddr *ifa_existing;
	__u32 valid_lft = INFINITY_LIFE_TIME;
	__u32 prefered_lft = INFINITY_LIFE_TIME;
937 938 939

	ASSERT_RTNL();

D
David Ahern 已提交
940
	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
941 942 943
	if (IS_ERR(ifa))
		return PTR_ERR(ifa);

J
Jiri Pirko 已提交
944 945 946
	ifa_existing = find_matching_ifa(ifa);
	if (!ifa_existing) {
		/* It would be best to check for !NLM_F_CREATE here but
S
stephen hemminger 已提交
947
		 * userspace already relies on not having to provide this.
J
Jiri Pirko 已提交
948 949
		 */
		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
950
		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
951
			int ret = ip_mc_autojoin_config(net, true, ifa);
952 953 954 955 956 957

			if (ret < 0) {
				inet_free_ifa(ifa);
				return ret;
			}
		}
958 959
		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
					 extack);
J
Jiri Pirko 已提交
960
	} else {
961 962
		u32 new_metric = ifa->ifa_rt_priority;

J
Jiri Pirko 已提交
963 964 965 966 967
		inet_free_ifa(ifa);

		if (nlh->nlmsg_flags & NLM_F_EXCL ||
		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
			return -EEXIST;
968
		ifa = ifa_existing;
969 970 971 972 973 974

		if (ifa->ifa_rt_priority != new_metric) {
			fib_modify_prefix_metric(ifa, new_metric);
			ifa->ifa_rt_priority = new_metric;
		}

975
		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
976
		cancel_delayed_work(&check_lifetime_work);
977 978
		queue_delayed_work(system_power_efficient_wq,
				&check_lifetime_work, 0);
979
		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
J
Jiri Pirko 已提交
980 981
	}
	return 0;
L
Linus Torvalds 已提交
982 983 984 985 986 987
}

/*
 *	Determine a default network mask, based on the IP address.
 */

E
Eric Dumazet 已提交
988
static int inet_abc_len(__be32 addr)
L
Linus Torvalds 已提交
989 990 991
{
	int rc = -1;	/* Something else, probably a multicast. */

992
	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
993
		rc = 0;
L
Linus Torvalds 已提交
994
	else {
995 996
		__u32 haddr = ntohl(addr);
		if (IN_CLASSA(haddr))
L
Linus Torvalds 已提交
997
			rc = 8;
998
		else if (IN_CLASSB(haddr))
L
Linus Torvalds 已提交
999
			rc = 16;
1000
		else if (IN_CLASSC(haddr))
L
Linus Torvalds 已提交
1001
			rc = 24;
1002 1003
		else if (IN_CLASSE(haddr))
			rc = 32;
L
Linus Torvalds 已提交
1004 1005
	}

1006
	return rc;
L
Linus Torvalds 已提交
1007 1008 1009
}


1010
int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
L
Linus Torvalds 已提交
1011 1012
{
	struct sockaddr_in sin_orig;
1013
	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1014
	struct in_ifaddr __rcu **ifap = NULL;
L
Linus Torvalds 已提交
1015 1016 1017 1018 1019 1020 1021
	struct in_device *in_dev;
	struct in_ifaddr *ifa = NULL;
	struct net_device *dev;
	char *colon;
	int ret = -EFAULT;
	int tryaddrmatch = 0;

1022
	ifr->ifr_name[IFNAMSIZ - 1] = 0;
L
Linus Torvalds 已提交
1023 1024 1025 1026

	/* save original address for comparison */
	memcpy(&sin_orig, sin, sizeof(*sin));

1027
	colon = strchr(ifr->ifr_name, ':');
L
Linus Torvalds 已提交
1028 1029 1030
	if (colon)
		*colon = 0;

1031
	dev_load(net, ifr->ifr_name);
L
Linus Torvalds 已提交
1032

S
Stephen Hemminger 已提交
1033
	switch (cmd) {
L
Linus Torvalds 已提交
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
	case SIOCGIFADDR:	/* Get interface address */
	case SIOCGIFBRDADDR:	/* Get the broadcast address */
	case SIOCGIFDSTADDR:	/* Get the destination address */
	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
		/* Note that these ioctls will not sleep,
		   so that we do not impose a lock.
		   One day we will be forced to put shlock here (I mean SMP)
		 */
		tryaddrmatch = (sin_orig.sin_family == AF_INET);
		memset(sin, 0, sizeof(*sin));
		sin->sin_family = AF_INET;
		break;

	case SIOCSIFFLAGS:
1048
		ret = -EPERM;
1049
		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
L
Linus Torvalds 已提交
1050 1051 1052 1053 1054 1055
			goto out;
		break;
	case SIOCSIFADDR:	/* Set interface address (and family) */
	case SIOCSIFBRDADDR:	/* Set the broadcast address */
	case SIOCSIFDSTADDR:	/* Set the destination address */
	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1056
		ret = -EPERM;
1057
		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
L
Linus Torvalds 已提交
1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
			goto out;
		ret = -EINVAL;
		if (sin->sin_family != AF_INET)
			goto out;
		break;
	default:
		ret = -EINVAL;
		goto out;
	}

	rtnl_lock();

	ret = -ENODEV;
1071
	dev = __dev_get_by_name(net, ifr->ifr_name);
E
Eric Dumazet 已提交
1072
	if (!dev)
L
Linus Torvalds 已提交
1073 1074 1075 1076 1077
		goto done;

	if (colon)
		*colon = ':';

E
Eric Dumazet 已提交
1078 1079
	in_dev = __in_dev_get_rtnl(dev);
	if (in_dev) {
L
Linus Torvalds 已提交
1080 1081 1082 1083 1084 1085
		if (tryaddrmatch) {
			/* Matthias Andree */
			/* compare label and address (4.4BSD style) */
			/* note: we only do this for a limited set of ioctls
			   and only if the original address family was AF_INET.
			   This is checked above. */
1086 1087 1088

			for (ifap = &in_dev->ifa_list;
			     (ifa = rtnl_dereference(*ifap)) != NULL;
L
Linus Torvalds 已提交
1089
			     ifap = &ifa->ifa_next) {
1090
				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
L
Linus Torvalds 已提交
1091
				    sin_orig.sin_addr.s_addr ==
1092
							ifa->ifa_local) {
L
Linus Torvalds 已提交
1093 1094 1095 1096 1097 1098 1099 1100
					break; /* found */
				}
			}
		}
		/* we didn't get a match, maybe the application is
		   4.3BSD-style and passed in junk so we fall back to
		   comparing just the label */
		if (!ifa) {
1101 1102
			for (ifap = &in_dev->ifa_list;
			     (ifa = rtnl_dereference(*ifap)) != NULL;
L
Linus Torvalds 已提交
1103
			     ifap = &ifa->ifa_next)
1104
				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
L
Linus Torvalds 已提交
1105 1106 1107 1108 1109 1110 1111 1112
					break;
		}
	}

	ret = -EADDRNOTAVAIL;
	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
		goto done;

S
Stephen Hemminger 已提交
1113
	switch (cmd) {
L
Linus Torvalds 已提交
1114
	case SIOCGIFADDR:	/* Get interface address */
1115
		ret = 0;
L
Linus Torvalds 已提交
1116
		sin->sin_addr.s_addr = ifa->ifa_local;
1117
		break;
L
Linus Torvalds 已提交
1118 1119

	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1120
		ret = 0;
L
Linus Torvalds 已提交
1121
		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1122
		break;
L
Linus Torvalds 已提交
1123 1124

	case SIOCGIFDSTADDR:	/* Get the destination address */
1125
		ret = 0;
L
Linus Torvalds 已提交
1126
		sin->sin_addr.s_addr = ifa->ifa_address;
1127
		break;
L
Linus Torvalds 已提交
1128 1129

	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1130
		ret = 0;
L
Linus Torvalds 已提交
1131
		sin->sin_addr.s_addr = ifa->ifa_mask;
1132
		break;
L
Linus Torvalds 已提交
1133 1134 1135 1136 1137 1138 1139

	case SIOCSIFFLAGS:
		if (colon) {
			ret = -EADDRNOTAVAIL;
			if (!ifa)
				break;
			ret = 0;
1140
			if (!(ifr->ifr_flags & IFF_UP))
L
Linus Torvalds 已提交
1141 1142 1143
				inet_del_ifa(in_dev, ifap, 1);
			break;
		}
1144
		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
L
Linus Torvalds 已提交
1145 1146 1147 1148 1149 1150 1151 1152 1153
		break;

	case SIOCSIFADDR:	/* Set interface address (and family) */
		ret = -EINVAL;
		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
			break;

		if (!ifa) {
			ret = -ENOBUFS;
E
Eric Dumazet 已提交
1154 1155
			ifa = inet_alloc_ifa();
			if (!ifa)
L
Linus Torvalds 已提交
1156
				break;
1157
			INIT_HLIST_NODE(&ifa->hash);
L
Linus Torvalds 已提交
1158
			if (colon)
1159
				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
L
Linus Torvalds 已提交
1160 1161 1162 1163 1164 1165 1166 1167
			else
				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
		} else {
			ret = 0;
			if (ifa->ifa_local == sin->sin_addr.s_addr)
				break;
			inet_del_ifa(in_dev, ifap, 0);
			ifa->ifa_broadcast = 0;
1168
			ifa->ifa_scope = 0;
L
Linus Torvalds 已提交
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183
		}

		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;

		if (!(dev->flags & IFF_POINTOPOINT)) {
			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
			if ((dev->flags & IFF_BROADCAST) &&
			    ifa->ifa_prefixlen < 31)
				ifa->ifa_broadcast = ifa->ifa_address |
						     ~ifa->ifa_mask;
		} else {
			ifa->ifa_prefixlen = 32;
			ifa->ifa_mask = inet_make_mask(32);
		}
J
Jiri Pirko 已提交
1184
		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
L
Linus Torvalds 已提交
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
		ret = inet_set_ifa(dev, ifa);
		break;

	case SIOCSIFBRDADDR:	/* Set the broadcast address */
		ret = 0;
		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
			inet_del_ifa(in_dev, ifap, 0);
			ifa->ifa_broadcast = sin->sin_addr.s_addr;
			inet_insert_ifa(ifa);
		}
		break;

	case SIOCSIFDSTADDR:	/* Set the destination address */
		ret = 0;
		if (ifa->ifa_address == sin->sin_addr.s_addr)
			break;
		ret = -EINVAL;
		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
			break;
		ret = 0;
		inet_del_ifa(in_dev, ifap, 0);
		ifa->ifa_address = sin->sin_addr.s_addr;
		inet_insert_ifa(ifa);
		break;

	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */

		/*
		 *	The mask we set must be legal.
		 */
		ret = -EINVAL;
		if (bad_mask(sin->sin_addr.s_addr, 0))
			break;
		ret = 0;
		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
A
Al Viro 已提交
1220
			__be32 old_mask = ifa->ifa_mask;
L
Linus Torvalds 已提交
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
			inet_del_ifa(in_dev, ifap, 0);
			ifa->ifa_mask = sin->sin_addr.s_addr;
			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);

			/* See if current broadcast address matches
			 * with current netmask, then recalculate
			 * the broadcast address. Otherwise it's a
			 * funny address, so don't touch it since
			 * the user seems to know what (s)he's doing...
			 */
			if ((dev->flags & IFF_BROADCAST) &&
			    (ifa->ifa_prefixlen < 31) &&
			    (ifa->ifa_broadcast ==
1234
			     (ifa->ifa_local|~old_mask))) {
L
Linus Torvalds 已提交
1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
				ifa->ifa_broadcast = (ifa->ifa_local |
						      ~sin->sin_addr.s_addr);
			}
			inet_insert_ifa(ifa);
		}
		break;
	}
done:
	rtnl_unlock();
out:
	return ret;
}

1248
int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
L
Linus Torvalds 已提交
1249
{
1250
	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1251
	const struct in_ifaddr *ifa;
L
Linus Torvalds 已提交
1252 1253 1254
	struct ifreq ifr;
	int done = 0;

1255 1256 1257
	if (WARN_ON(size > sizeof(struct ifreq)))
		goto out;

E
Eric Dumazet 已提交
1258
	if (!in_dev)
L
Linus Torvalds 已提交
1259 1260
		goto out;

1261
	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
L
Linus Torvalds 已提交
1262
		if (!buf) {
1263
			done += size;
L
Linus Torvalds 已提交
1264 1265
			continue;
		}
1266
		if (len < size)
L
Linus Torvalds 已提交
1267 1268
			break;
		memset(&ifr, 0, sizeof(struct ifreq));
D
Dan Carpenter 已提交
1269
		strcpy(ifr.ifr_name, ifa->ifa_label);
L
Linus Torvalds 已提交
1270 1271 1272 1273 1274

		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
								ifa->ifa_local;

1275
		if (copy_to_user(buf + done, &ifr, size)) {
L
Linus Torvalds 已提交
1276 1277 1278
			done = -EFAULT;
			break;
		}
1279 1280
		len  -= size;
		done += size;
L
Linus Torvalds 已提交
1281 1282 1283 1284 1285
	}
out:
	return done;
}

1286 1287 1288
static __be32 in_dev_select_addr(const struct in_device *in_dev,
				 int scope)
{
1289 1290 1291 1292 1293
	const struct in_ifaddr *ifa;

	in_dev_for_each_ifa_rcu(ifa, in_dev) {
		if (ifa->ifa_flags & IFA_F_SECONDARY)
			continue;
1294 1295 1296
		if (ifa->ifa_scope != RT_SCOPE_LINK &&
		    ifa->ifa_scope <= scope)
			return ifa->ifa_local;
1297
	}
1298 1299 1300 1301

	return 0;
}

A
Al Viro 已提交
1302
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
L
Linus Torvalds 已提交
1303
{
1304
	const struct in_ifaddr *ifa;
A
Al Viro 已提交
1305
	__be32 addr = 0;
1306
	unsigned char localnet_scope = RT_SCOPE_HOST;
L
Linus Torvalds 已提交
1307
	struct in_device *in_dev;
1308
	struct net *net = dev_net(dev);
1309
	int master_idx;
L
Linus Torvalds 已提交
1310 1311

	rcu_read_lock();
1312
	in_dev = __in_dev_get_rcu(dev);
L
Linus Torvalds 已提交
1313 1314 1315
	if (!in_dev)
		goto no_in_dev;

1316 1317 1318
	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
		localnet_scope = RT_SCOPE_LINK;

1319 1320 1321
	in_dev_for_each_ifa_rcu(ifa, in_dev) {
		if (ifa->ifa_flags & IFA_F_SECONDARY)
			continue;
1322
		if (min(ifa->ifa_scope, localnet_scope) > scope)
L
Linus Torvalds 已提交
1323 1324 1325 1326 1327 1328 1329
			continue;
		if (!dst || inet_ifa_match(dst, ifa)) {
			addr = ifa->ifa_local;
			break;
		}
		if (!addr)
			addr = ifa->ifa_local;
1330
	}
L
Linus Torvalds 已提交
1331 1332

	if (addr)
1333
		goto out_unlock;
E
Eric Dumazet 已提交
1334
no_in_dev:
1335
	master_idx = l3mdev_master_ifindex_rcu(dev);
L
Linus Torvalds 已提交
1336

1337 1338 1339 1340 1341 1342 1343 1344
	/* For VRFs, the VRF device takes the place of the loopback device,
	 * with addresses on it being preferred.  Note in such cases the
	 * loopback device will be among the devices that fail the master_idx
	 * equality check in the loop below.
	 */
	if (master_idx &&
	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
	    (in_dev = __in_dev_get_rcu(dev))) {
1345 1346 1347
		addr = in_dev_select_addr(in_dev, scope);
		if (addr)
			goto out_unlock;
1348 1349
	}

L
Linus Torvalds 已提交
1350
	/* Not loopback addresses on loopback should be preferred
S
Stephen Hemminger 已提交
1351
	   in this case. It is important that lo is the first interface
L
Linus Torvalds 已提交
1352 1353
	   in dev_base list.
	 */
1354
	for_each_netdev_rcu(net, dev) {
1355 1356 1357
		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
			continue;

E
Eric Dumazet 已提交
1358 1359
		in_dev = __in_dev_get_rcu(dev);
		if (!in_dev)
L
Linus Torvalds 已提交
1360 1361
			continue;

1362 1363 1364
		addr = in_dev_select_addr(in_dev, scope);
		if (addr)
			goto out_unlock;
L
Linus Torvalds 已提交
1365
	}
1366
out_unlock:
L
Linus Torvalds 已提交
1367 1368 1369
	rcu_read_unlock();
	return addr;
}
E
Eric Dumazet 已提交
1370
EXPORT_SYMBOL(inet_select_addr);
L
Linus Torvalds 已提交
1371

A
Al Viro 已提交
1372 1373
static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
			      __be32 local, int scope)
L
Linus Torvalds 已提交
1374
{
1375
	unsigned char localnet_scope = RT_SCOPE_HOST;
1376
	const struct in_ifaddr *ifa;
A
Al Viro 已提交
1377
	__be32 addr = 0;
1378
	int same = 0;
L
Linus Torvalds 已提交
1379

1380 1381 1382
	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
		localnet_scope = RT_SCOPE_LINK;

1383
	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1384 1385
		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);

L
Linus Torvalds 已提交
1386 1387
		if (!addr &&
		    (local == ifa->ifa_local || !local) &&
1388
		    min_scope <= scope) {
L
Linus Torvalds 已提交
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
			addr = ifa->ifa_local;
			if (same)
				break;
		}
		if (!same) {
			same = (!local || inet_ifa_match(local, ifa)) &&
				(!dst || inet_ifa_match(dst, ifa));
			if (same && addr) {
				if (local || !dst)
					break;
				/* Is the selected addr into dst subnet? */
				if (inet_ifa_match(addr, ifa))
					break;
				/* No, then can we use new local src? */
1403
				if (min_scope <= scope) {
L
Linus Torvalds 已提交
1404 1405 1406 1407 1408 1409 1410
					addr = ifa->ifa_local;
					break;
				}
				/* search for large dst subnet for addr */
				same = 0;
			}
		}
1411
	}
L
Linus Torvalds 已提交
1412

E
Eric Dumazet 已提交
1413
	return same ? addr : 0;
L
Linus Torvalds 已提交
1414 1415 1416 1417
}

/*
 * Confirm that local IP address exists using wildcards:
1418 1419
 * - net: netns to check, cannot be NULL
 * - in_dev: only on this interface, NULL=any interface
L
Linus Torvalds 已提交
1420 1421 1422 1423
 * - dst: only in the same subnet as dst, 0=any dst
 * - local: address, 0=autoselect the local address
 * - scope: maximum allowed scope value for the local address
 */
1424
__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1425
			 __be32 dst, __be32 local, int scope)
L
Linus Torvalds 已提交
1426
{
A
Al Viro 已提交
1427
	__be32 addr = 0;
1428
	struct net_device *dev;
L
Linus Torvalds 已提交
1429

1430
	if (in_dev)
1431
		return confirm_addr_indev(in_dev, dst, local, scope);
L
Linus Torvalds 已提交
1432 1433

	rcu_read_lock();
1434
	for_each_netdev_rcu(net, dev) {
E
Eric Dumazet 已提交
1435 1436
		in_dev = __in_dev_get_rcu(dev);
		if (in_dev) {
L
Linus Torvalds 已提交
1437 1438 1439 1440 1441 1442 1443 1444 1445
			addr = confirm_addr_indev(in_dev, dst, local, scope);
			if (addr)
				break;
		}
	}
	rcu_read_unlock();

	return addr;
}
1446
EXPORT_SYMBOL(inet_confirm_addr);
L
Linus Torvalds 已提交
1447 1448 1449 1450 1451 1452 1453

/*
 *	Device notifier
 */

int register_inetaddr_notifier(struct notifier_block *nb)
{
1454
	return blocking_notifier_chain_register(&inetaddr_chain, nb);
L
Linus Torvalds 已提交
1455
}
E
Eric Dumazet 已提交
1456
EXPORT_SYMBOL(register_inetaddr_notifier);
L
Linus Torvalds 已提交
1457 1458 1459

int unregister_inetaddr_notifier(struct notifier_block *nb)
{
1460
	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
L
Linus Torvalds 已提交
1461
}
E
Eric Dumazet 已提交
1462
EXPORT_SYMBOL(unregister_inetaddr_notifier);
L
Linus Torvalds 已提交
1463

1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476
int register_inetaddr_validator_notifier(struct notifier_block *nb)
{
	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
}
EXPORT_SYMBOL(register_inetaddr_validator_notifier);

int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
{
	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
	    nb);
}
EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);

E
Eric Dumazet 已提交
1477 1478
/* Rename ifa_labels for a device name change. Make some effort to preserve
 * existing alias numbering and to create unique labels if possible.
L
Linus Torvalds 已提交
1479 1480
*/
static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1481
{
L
Linus Torvalds 已提交
1482 1483 1484
	struct in_ifaddr *ifa;
	int named = 0;

1485
	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1486
		char old[IFNAMSIZ], *dot;
L
Linus Torvalds 已提交
1487 1488

		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1489
		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
L
Linus Torvalds 已提交
1490
		if (named++ == 0)
1491
			goto skip;
1492
		dot = strchr(old, ':');
1493
		if (!dot) {
1494
			sprintf(old, ":%d", named);
L
Linus Torvalds 已提交
1495 1496
			dot = old;
		}
E
Eric Dumazet 已提交
1497
		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1498
			strcat(ifa->ifa_label, dot);
E
Eric Dumazet 已提交
1499
		else
1500
			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1501 1502
skip:
		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1503 1504
	}
}
L
Linus Torvalds 已提交
1505

1506 1507 1508 1509
static void inetdev_send_gratuitous_arp(struct net_device *dev,
					struct in_device *in_dev)

{
1510
	const struct in_ifaddr *ifa;
1511

1512
	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1513 1514 1515 1516 1517
		arp_send(ARPOP_REQUEST, ETH_P_ARP,
			 ifa->ifa_local, dev,
			 ifa->ifa_local, NULL,
			 dev->dev_addr, NULL);
	}
1518 1519
}

L
Linus Torvalds 已提交
1520 1521 1522 1523 1524
/* Called only under RTNL semaphore */

static int inetdev_event(struct notifier_block *this, unsigned long event,
			 void *ptr)
{
1525
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1526
	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1527

L
Linus Torvalds 已提交
1528 1529 1530
	ASSERT_RTNL();

	if (!in_dev) {
1531
		if (event == NETDEV_REGISTER) {
L
Linus Torvalds 已提交
1532
			in_dev = inetdev_init(dev);
1533 1534
			if (IS_ERR(in_dev))
				return notifier_from_errno(PTR_ERR(in_dev));
1535
			if (dev->flags & IFF_LOOPBACK) {
1536 1537
				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1538
			}
B
Breno Leitao 已提交
1539 1540 1541 1542
		} else if (event == NETDEV_CHANGEMTU) {
			/* Re-enabling IP */
			if (inetdev_valid_mtu(dev->mtu))
				in_dev = inetdev_init(dev);
L
Linus Torvalds 已提交
1543 1544 1545 1546 1547 1548
		}
		goto out;
	}

	switch (event) {
	case NETDEV_REGISTER:
1549
		pr_debug("%s: bug\n", __func__);
1550
		RCU_INIT_POINTER(dev->ip_ptr, NULL);
L
Linus Torvalds 已提交
1551 1552
		break;
	case NETDEV_UP:
B
Breno Leitao 已提交
1553
		if (!inetdev_valid_mtu(dev->mtu))
L
Linus Torvalds 已提交
1554
			break;
1555
		if (dev->flags & IFF_LOOPBACK) {
E
Eric Dumazet 已提交
1556 1557 1558
			struct in_ifaddr *ifa = inet_alloc_ifa();

			if (ifa) {
1559
				INIT_HLIST_NODE(&ifa->hash);
L
Linus Torvalds 已提交
1560 1561 1562 1563 1564 1565 1566 1567
				ifa->ifa_local =
				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
				ifa->ifa_prefixlen = 8;
				ifa->ifa_mask = inet_make_mask(8);
				in_dev_hold(in_dev);
				ifa->ifa_dev = in_dev;
				ifa->ifa_scope = RT_SCOPE_HOST;
				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
J
Jiri Pirko 已提交
1568 1569
				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
						 INFINITY_LIFE_TIME);
1570 1571
				ipv4_devconf_setall(in_dev);
				neigh_parms_data_state_setall(in_dev->arp_parms);
L
Linus Torvalds 已提交
1572 1573 1574 1575
				inet_insert_ifa(ifa);
			}
		}
		ip_mc_up(in_dev);
J
Joe Perches 已提交
1576
		fallthrough;
1577
	case NETDEV_CHANGEADDR:
1578 1579
		if (!IN_DEV_ARP_NOTIFY(in_dev))
			break;
J
Joe Perches 已提交
1580
		fallthrough;
1581
	case NETDEV_NOTIFY_PEERS:
1582
		/* Send gratuitous ARP to notify of link change */
1583
		inetdev_send_gratuitous_arp(dev, in_dev);
L
Linus Torvalds 已提交
1584 1585 1586 1587
		break;
	case NETDEV_DOWN:
		ip_mc_down(in_dev);
		break;
1588
	case NETDEV_PRE_TYPE_CHANGE:
1589 1590
		ip_mc_unmap(in_dev);
		break;
1591
	case NETDEV_POST_TYPE_CHANGE:
1592 1593
		ip_mc_remap(in_dev);
		break;
L
Linus Torvalds 已提交
1594
	case NETDEV_CHANGEMTU:
B
Breno Leitao 已提交
1595
		if (inetdev_valid_mtu(dev->mtu))
L
Linus Torvalds 已提交
1596
			break;
B
Breno Leitao 已提交
1597
		/* disable IP when MTU is not enough */
J
Joe Perches 已提交
1598
		fallthrough;
L
Linus Torvalds 已提交
1599 1600 1601 1602 1603 1604 1605 1606 1607
	case NETDEV_UNREGISTER:
		inetdev_destroy(in_dev);
		break;
	case NETDEV_CHANGENAME:
		/* Do not notify about label change, this event is
		 * not interesting to applications using netlink.
		 */
		inetdev_changename(dev, in_dev);

1608
		devinet_sysctl_unregister(in_dev);
1609
		devinet_sysctl_register(in_dev);
L
Linus Torvalds 已提交
1610 1611 1612 1613 1614 1615 1616
		break;
	}
out:
	return NOTIFY_DONE;
}

static struct notifier_block ip_netdev_notifier = {
J
Jianjun Kong 已提交
1617
	.notifier_call = inetdev_event,
L
Linus Torvalds 已提交
1618 1619
};

E
Eric Dumazet 已提交
1620
static size_t inet_nlmsg_size(void)
1621 1622 1623 1624 1625
{
	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
	       + nla_total_size(4) /* IFA_ADDRESS */
	       + nla_total_size(4) /* IFA_LOCAL */
	       + nla_total_size(4) /* IFA_BROADCAST */
1626
	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1627
	       + nla_total_size(4)  /* IFA_FLAGS */
1628
	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1629
	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1630 1631
}

J
Jiri Pirko 已提交
1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
static inline u32 cstamp_delta(unsigned long cstamp)
{
	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
}

static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
			 unsigned long tstamp, u32 preferred, u32 valid)
{
	struct ifa_cacheinfo ci;

	ci.cstamp = cstamp_delta(cstamp);
	ci.tstamp = cstamp_delta(tstamp);
	ci.ifa_prefered = preferred;
	ci.ifa_valid = valid;

	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
}

L
Linus Torvalds 已提交
1650
static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
C
Christian Brauner 已提交
1651
			    struct inet_fill_args *args)
L
Linus Torvalds 已提交
1652 1653 1654
{
	struct ifaddrmsg *ifm;
	struct nlmsghdr  *nlh;
J
Jiri Pirko 已提交
1655
	u32 preferred, valid;
L
Linus Torvalds 已提交
1656

C
Christian Brauner 已提交
1657 1658
	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
			args->flags);
1659
	if (!nlh)
1660
		return -EMSGSIZE;
1661 1662

	ifm = nlmsg_data(nlh);
L
Linus Torvalds 已提交
1663 1664
	ifm->ifa_family = AF_INET;
	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
J
Jiri Pirko 已提交
1665
	ifm->ifa_flags = ifa->ifa_flags;
L
Linus Torvalds 已提交
1666 1667
	ifm->ifa_scope = ifa->ifa_scope;
	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1668

C
Christian Brauner 已提交
1669 1670
	if (args->netnsid >= 0 &&
	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1671 1672
		goto nla_put_failure;

J
Jiri Pirko 已提交
1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693
	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
		preferred = ifa->ifa_preferred_lft;
		valid = ifa->ifa_valid_lft;
		if (preferred != INFINITY_LIFE_TIME) {
			long tval = (jiffies - ifa->ifa_tstamp) / HZ;

			if (preferred > tval)
				preferred -= tval;
			else
				preferred = 0;
			if (valid != INFINITY_LIFE_TIME) {
				if (valid > tval)
					valid -= tval;
				else
					valid = 0;
			}
		}
	} else {
		preferred = INFINITY_LIFE_TIME;
		valid = INFINITY_LIFE_TIME;
	}
D
David S. Miller 已提交
1694
	if ((ifa->ifa_address &&
1695
	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
D
David S. Miller 已提交
1696
	    (ifa->ifa_local &&
1697
	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
D
David S. Miller 已提交
1698
	    (ifa->ifa_broadcast &&
1699
	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
D
David S. Miller 已提交
1700
	    (ifa->ifa_label[0] &&
J
Jiri Pirko 已提交
1701
	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1702
	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1703 1704
	    (ifa->ifa_rt_priority &&
	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
J
Jiri Pirko 已提交
1705 1706
	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
			  preferred, valid))
D
David S. Miller 已提交
1707
		goto nla_put_failure;
L
Linus Torvalds 已提交
1708

1709 1710
	nlmsg_end(skb, nlh);
	return 0;
1711 1712

nla_put_failure:
1713 1714
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
L
Linus Torvalds 已提交
1715 1716
}

1717 1718 1719
static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
				      struct inet_fill_args *fillargs,
				      struct net **tgt_net, struct sock *sk,
1720
				      struct netlink_callback *cb)
1721
{
1722
	struct netlink_ext_ack *extack = cb->extack;
1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736
	struct nlattr *tb[IFA_MAX+1];
	struct ifaddrmsg *ifm;
	int err, i;

	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
		return -EINVAL;
	}

	ifm = nlmsg_data(nlh);
	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
		return -EINVAL;
	}
1737 1738 1739 1740 1741

	fillargs->ifindex = ifm->ifa_index;
	if (fillargs->ifindex) {
		cb->answer_flags |= NLM_F_DUMP_FILTERED;
		fillargs->flags |= NLM_F_DUMP_FILTERED;
1742 1743
	}

1744 1745
	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
					    ifa_ipv4_policy, extack);
1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759
	if (err < 0)
		return err;

	for (i = 0; i <= IFA_MAX; ++i) {
		if (!tb[i])
			continue;

		if (i == IFA_TARGET_NETNSID) {
			struct net *net;

			fillargs->netnsid = nla_get_s32(tb[i]);

			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
			if (IS_ERR(net)) {
1760
				fillargs->netnsid = -1;
1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773
				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
				return PTR_ERR(net);
			}
			*tgt_net = net;
		} else {
			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
			return -EINVAL;
		}
	}

	return 0;
}

1774 1775 1776 1777 1778 1779 1780 1781
static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
			    struct netlink_callback *cb, int s_ip_idx,
			    struct inet_fill_args *fillargs)
{
	struct in_ifaddr *ifa;
	int ip_idx = 0;
	int err;

1782
	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1783 1784
		if (ip_idx < s_ip_idx) {
			ip_idx++;
1785
			continue;
1786
		}
1787 1788 1789 1790 1791
		err = inet_fill_ifaddr(skb, ifa, fillargs);
		if (err < 0)
			goto done;

		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1792
		ip_idx++;
1793 1794 1795 1796 1797 1798 1799 1800 1801
	}
	err = 0;

done:
	cb->args[2] = ip_idx;

	return err;
}

L
Linus Torvalds 已提交
1802 1803
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
1804
	const struct nlmsghdr *nlh = cb->nlh;
C
Christian Brauner 已提交
1805 1806
	struct inet_fill_args fillargs = {
		.portid = NETLINK_CB(cb->skb).portid,
1807
		.seq = nlh->nlmsg_seq,
C
Christian Brauner 已提交
1808 1809 1810 1811
		.event = RTM_NEWADDR,
		.flags = NLM_F_MULTI,
		.netnsid = -1,
	};
1812
	struct net *net = sock_net(skb->sk);
1813
	struct net *tgt_net = net;
E
Eric Dumazet 已提交
1814 1815
	int h, s_h;
	int idx, s_idx;
1816
	int s_ip_idx;
L
Linus Torvalds 已提交
1817 1818
	struct net_device *dev;
	struct in_device *in_dev;
E
Eric Dumazet 已提交
1819
	struct hlist_head *head;
1820
	int err = 0;
L
Linus Torvalds 已提交
1821

E
Eric Dumazet 已提交
1822 1823
	s_h = cb->args[0];
	s_idx = idx = cb->args[1];
1824
	s_ip_idx = cb->args[2];
E
Eric Dumazet 已提交
1825

1826 1827
	if (cb->strict_check) {
		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1828
						 skb->sk, cb);
1829
		if (err < 0)
1830
			goto put_tgt_net;
1831

1832
		err = 0;
1833 1834
		if (fillargs.ifindex) {
			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1835 1836 1837 1838
			if (!dev) {
				err = -ENODEV;
				goto put_tgt_net;
			}
1839 1840 1841 1842 1843 1844 1845 1846

			in_dev = __in_dev_get_rtnl(dev);
			if (in_dev) {
				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
						       &fillargs);
			}
			goto put_tgt_net;
		}
1847 1848
	}

E
Eric Dumazet 已提交
1849 1850
	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
		idx = 0;
1851
		head = &tgt_net->dev_index_head[h];
E
Eric Dumazet 已提交
1852
		rcu_read_lock();
1853 1854
		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
			  tgt_net->dev_base_seq;
1855
		hlist_for_each_entry_rcu(dev, head, index_hlist) {
E
Eric Dumazet 已提交
1856 1857
			if (idx < s_idx)
				goto cont;
1858
			if (h > s_h || idx > s_idx)
E
Eric Dumazet 已提交
1859 1860 1861 1862
				s_ip_idx = 0;
			in_dev = __in_dev_get_rcu(dev);
			if (!in_dev)
				goto cont;
L
Linus Torvalds 已提交
1863

1864 1865 1866 1867 1868
			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
					       &fillargs);
			if (err < 0) {
				rcu_read_unlock();
				goto done;
E
Eric Dumazet 已提交
1869
			}
1870
cont:
E
Eric Dumazet 已提交
1871 1872 1873
			idx++;
		}
		rcu_read_unlock();
L
Linus Torvalds 已提交
1874 1875 1876
	}

done:
E
Eric Dumazet 已提交
1877 1878
	cb->args[0] = h;
	cb->args[1] = idx;
1879
put_tgt_net:
C
Christian Brauner 已提交
1880
	if (fillargs.netnsid >= 0)
1881
		put_net(tgt_net);
L
Linus Torvalds 已提交
1882

1883
	return skb->len ? : err;
L
Linus Torvalds 已提交
1884 1885
}

J
Jianjun Kong 已提交
1886
static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1887
		      u32 portid)
L
Linus Torvalds 已提交
1888
{
C
Christian Brauner 已提交
1889 1890 1891 1892 1893 1894 1895
	struct inet_fill_args fillargs = {
		.portid = portid,
		.seq = nlh ? nlh->nlmsg_seq : 0,
		.event = event,
		.flags = 0,
		.netnsid = -1,
	};
1896
	struct sk_buff *skb;
1897
	int err = -ENOBUFS;
1898
	struct net *net;
L
Linus Torvalds 已提交
1899

1900
	net = dev_net(ifa->ifa_dev->dev);
1901
	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1902
	if (!skb)
1903 1904
		goto errout;

C
Christian Brauner 已提交
1905
	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1906 1907 1908 1909 1910 1911
	if (err < 0) {
		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
1912
	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1913
	return;
1914 1915
errout:
	if (err < 0)
1916
		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
L
Linus Torvalds 已提交
1917 1918
}

1919 1920
static size_t inet_get_link_af_size(const struct net_device *dev,
				    u32 ext_filter_mask)
T
Thomas Graf 已提交
1921
{
E
Eric Dumazet 已提交
1922
	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
T
Thomas Graf 已提交
1923 1924 1925 1926 1927 1928 1929

	if (!in_dev)
		return 0;

	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
}

1930 1931
static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
			     u32 ext_filter_mask)
T
Thomas Graf 已提交
1932
{
E
Eric Dumazet 已提交
1933
	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
T
Thomas Graf 已提交
1934 1935 1936 1937 1938 1939 1940
	struct nlattr *nla;
	int i;

	if (!in_dev)
		return -ENODATA;

	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1941
	if (!nla)
T
Thomas Graf 已提交
1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
		return -EMSGSIZE;

	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];

	return 0;
}

static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
};

1954
static int inet_validate_link_af(const struct net_device *dev,
R
Rocco Yue 已提交
1955 1956
				 const struct nlattr *nla,
				 struct netlink_ext_ack *extack)
T
Thomas Graf 已提交
1957 1958 1959 1960
{
	struct nlattr *a, *tb[IFLA_INET_MAX+1];
	int err, rem;

1961
	if (dev && !__in_dev_get_rtnl(dev))
1962
		return -EAFNOSUPPORT;
T
Thomas Graf 已提交
1963

1964
	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
R
Rocco Yue 已提交
1965
					  inet_af_policy, extack);
T
Thomas Graf 已提交
1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980
	if (err < 0)
		return err;

	if (tb[IFLA_INET_CONF]) {
		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
			int cfgid = nla_type(a);

			if (nla_len(a) < 4)
				return -EINVAL;

			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
				return -EINVAL;
		}
	}

1981 1982 1983
	return 0;
}

1984 1985
static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
			    struct netlink_ext_ack *extack)
1986
{
1987
	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1988 1989 1990 1991 1992 1993
	struct nlattr *a, *tb[IFLA_INET_MAX+1];
	int rem;

	if (!in_dev)
		return -EAFNOSUPPORT;

1994
	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1995
		return -EINVAL;
1996

T
Thomas Graf 已提交
1997 1998 1999 2000 2001 2002 2003 2004
	if (tb[IFLA_INET_CONF]) {
		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
	}

	return 0;
}

2005 2006 2007 2008
static int inet_netconf_msgsize_devconf(int type)
{
	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2009
	bool all = false;
2010

2011 2012 2013 2014
	if (type == NETCONFA_ALL)
		all = true;

	if (all || type == NETCONFA_FORWARDING)
2015
		size += nla_total_size(4);
2016
	if (all || type == NETCONFA_RP_FILTER)
2017
		size += nla_total_size(4);
2018
	if (all || type == NETCONFA_MC_FORWARDING)
2019
		size += nla_total_size(4);
2020 2021
	if (all || type == NETCONFA_BC_FORWARDING)
		size += nla_total_size(4);
2022
	if (all || type == NETCONFA_PROXY_NEIGH)
S
stephen hemminger 已提交
2023
		size += nla_total_size(4);
2024
	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2025
		size += nla_total_size(4);
2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036

	return size;
}

static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
				     struct ipv4_devconf *devconf, u32 portid,
				     u32 seq, int event, unsigned int flags,
				     int type)
{
	struct nlmsghdr  *nlh;
	struct netconfmsg *ncm;
2037
	bool all = false;
2038 2039 2040

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
			flags);
2041
	if (!nlh)
2042 2043
		return -EMSGSIZE;

2044 2045 2046
	if (type == NETCONFA_ALL)
		all = true;

2047 2048 2049 2050 2051 2052
	ncm = nlmsg_data(nlh);
	ncm->ncm_family = AF_INET;

	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
		goto nla_put_failure;

2053 2054 2055
	if (!devconf)
		goto out;

2056
	if ((all || type == NETCONFA_FORWARDING) &&
2057 2058 2059
	    nla_put_s32(skb, NETCONFA_FORWARDING,
			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
		goto nla_put_failure;
2060
	if ((all || type == NETCONFA_RP_FILTER) &&
2061 2062 2063
	    nla_put_s32(skb, NETCONFA_RP_FILTER,
			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
		goto nla_put_failure;
2064
	if ((all || type == NETCONFA_MC_FORWARDING) &&
2065 2066 2067
	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
		goto nla_put_failure;
2068 2069 2070 2071
	if ((all || type == NETCONFA_BC_FORWARDING) &&
	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
		goto nla_put_failure;
2072
	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2073
	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
S
stephen hemminger 已提交
2074 2075
			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
		goto nla_put_failure;
2076
	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2077 2078 2079
	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
		goto nla_put_failure;
2080

2081
out:
2082 2083
	nlmsg_end(skb, nlh);
	return 0;
2084 2085 2086 2087 2088 2089

nla_put_failure:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}

2090 2091
void inet_netconf_notify_devconf(struct net *net, int event, int type,
				 int ifindex, struct ipv4_devconf *devconf)
2092 2093 2094 2095
{
	struct sk_buff *skb;
	int err = -ENOBUFS;

2096
	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2097
	if (!skb)
2098 2099 2100
		goto errout;

	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2101
					event, 0, type);
2102 2103 2104 2105 2106 2107
	if (err < 0) {
		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
2108
	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2109 2110 2111 2112 2113 2114
	return;
errout:
	if (err < 0)
		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
}

2115 2116 2117
static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2118
	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2119
	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2120
	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2121 2122
};

2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135
static int inet_netconf_valid_get_req(struct sk_buff *skb,
				      const struct nlmsghdr *nlh,
				      struct nlattr **tb,
				      struct netlink_ext_ack *extack)
{
	int i, err;

	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
		return -EINVAL;
	}

	if (!netlink_strict_get_check(skb))
2136 2137 2138
		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
					      tb, NETCONFA_MAX,
					      devconf_ipv4_policy, extack);
2139

2140 2141 2142
	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
					    tb, NETCONFA_MAX,
					    devconf_ipv4_policy, extack);
2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161
	if (err)
		return err;

	for (i = 0; i <= NETCONFA_MAX; i++) {
		if (!tb[i])
			continue;

		switch (i) {
		case NETCONFA_IFINDEX:
			break;
		default:
			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
			return -EINVAL;
		}
	}

	return 0;
}

2162
static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2163 2164
				    struct nlmsghdr *nlh,
				    struct netlink_ext_ack *extack)
2165 2166 2167 2168 2169 2170 2171 2172 2173 2174
{
	struct net *net = sock_net(in_skb->sk);
	struct nlattr *tb[NETCONFA_MAX+1];
	struct sk_buff *skb;
	struct ipv4_devconf *devconf;
	struct in_device *in_dev;
	struct net_device *dev;
	int ifindex;
	int err;

2175 2176
	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
	if (err)
2177 2178
		goto errout;

2179
	err = -EINVAL;
2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192
	if (!tb[NETCONFA_IFINDEX])
		goto errout;

	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
	switch (ifindex) {
	case NETCONFA_IFINDEX_ALL:
		devconf = net->ipv4.devconf_all;
		break;
	case NETCONFA_IFINDEX_DEFAULT:
		devconf = net->ipv4.devconf_dflt;
		break;
	default:
		dev = __dev_get_by_index(net, ifindex);
2193
		if (!dev)
2194 2195
			goto errout;
		in_dev = __in_dev_get_rtnl(dev);
2196
		if (!in_dev)
2197 2198 2199 2200 2201 2202
			goto errout;
		devconf = &in_dev->cnf;
		break;
	}

	err = -ENOBUFS;
2203
	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2204
	if (!skb)
2205 2206 2207 2208 2209
		goto errout;

	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
					NETLINK_CB(in_skb).portid,
					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2210
					NETCONFA_ALL);
2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221
	if (err < 0) {
		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto errout;
	}
	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
	return err;
}

2222 2223 2224
static int inet_netconf_dump_devconf(struct sk_buff *skb,
				     struct netlink_callback *cb)
{
2225
	const struct nlmsghdr *nlh = cb->nlh;
2226 2227 2228 2229 2230 2231 2232
	struct net *net = sock_net(skb->sk);
	int h, s_h;
	int idx, s_idx;
	struct net_device *dev;
	struct in_device *in_dev;
	struct hlist_head *head;

2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247
	if (cb->strict_check) {
		struct netlink_ext_ack *extack = cb->extack;
		struct netconfmsg *ncm;

		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
			return -EINVAL;
		}

		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
			return -EINVAL;
		}
	}

2248 2249 2250 2251 2252 2253 2254
	s_h = cb->args[0];
	s_idx = idx = cb->args[1];

	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
		idx = 0;
		head = &net->dev_index_head[h];
		rcu_read_lock();
2255 2256
		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
			  net->dev_base_seq;
2257 2258 2259 2260 2261 2262 2263 2264 2265 2266
		hlist_for_each_entry_rcu(dev, head, index_hlist) {
			if (idx < s_idx)
				goto cont;
			in_dev = __in_dev_get_rcu(dev);
			if (!in_dev)
				goto cont;

			if (inet_netconf_fill_devconf(skb, dev->ifindex,
						      &in_dev->cnf,
						      NETLINK_CB(cb->skb).portid,
2267
						      nlh->nlmsg_seq,
2268 2269
						      RTM_NEWNETCONF,
						      NLM_F_MULTI,
2270
						      NETCONFA_ALL) < 0) {
2271 2272 2273
				rcu_read_unlock();
				goto done;
			}
2274
			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2275 2276 2277 2278 2279 2280 2281 2282 2283
cont:
			idx++;
		}
		rcu_read_unlock();
	}
	if (h == NETDEV_HASHENTRIES) {
		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
					      net->ipv4.devconf_all,
					      NETLINK_CB(cb->skb).portid,
2284
					      nlh->nlmsg_seq,
2285
					      RTM_NEWNETCONF, NLM_F_MULTI,
2286
					      NETCONFA_ALL) < 0)
2287 2288 2289 2290 2291 2292 2293 2294
			goto done;
		else
			h++;
	}
	if (h == NETDEV_HASHENTRIES + 1) {
		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
					      net->ipv4.devconf_dflt,
					      NETLINK_CB(cb->skb).portid,
2295
					      nlh->nlmsg_seq,
2296
					      RTM_NEWNETCONF, NLM_F_MULTI,
2297
					      NETCONFA_ALL) < 0)
2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308
			goto done;
		else
			h++;
	}
done:
	cb->args[0] = h;
	cb->args[1] = idx;

	return skb->len;
}

L
Linus Torvalds 已提交
2309 2310
#ifdef CONFIG_SYSCTL

2311
static void devinet_copy_dflt_conf(struct net *net, int i)
2312 2313 2314
{
	struct net_device *dev;

2315 2316
	rcu_read_lock();
	for_each_netdev_rcu(net, dev) {
2317
		struct in_device *in_dev;
2318

2319 2320
		in_dev = __in_dev_get_rcu(dev);
		if (in_dev && !test_bit(i, in_dev->cnf.state))
2321
			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2322
	}
2323
	rcu_read_unlock();
2324 2325
}

2326
/* called with RTNL locked */
2327
static void inet_forward_change(struct net *net)
2328 2329
{
	struct net_device *dev;
2330
	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2331

2332
	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2333
	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2334 2335
	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
				    NETCONFA_FORWARDING,
2336 2337
				    NETCONFA_IFINDEX_ALL,
				    net->ipv4.devconf_all);
2338 2339
	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
				    NETCONFA_FORWARDING,
2340 2341
				    NETCONFA_IFINDEX_DEFAULT,
				    net->ipv4.devconf_dflt);
2342

2343
	for_each_netdev(net, dev) {
2344
		struct in_device *in_dev;
2345

2346 2347
		if (on)
			dev_disable_lro(dev);
2348 2349

		in_dev = __in_dev_get_rtnl(dev);
2350
		if (in_dev) {
2351
			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2352 2353
			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
						    NETCONFA_FORWARDING,
2354 2355
						    dev->ifindex, &in_dev->cnf);
		}
2356 2357 2358
	}
}

S
stephen hemminger 已提交
2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371
static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
{
	if (cnf == net->ipv4.devconf_dflt)
		return NETCONFA_IFINDEX_DEFAULT;
	else if (cnf == net->ipv4.devconf_all)
		return NETCONFA_IFINDEX_ALL;
	else {
		struct in_device *idev
			= container_of(cnf, struct in_device, cnf);
		return idev->dev->ifindex;
	}
}

2372
static int devinet_conf_proc(struct ctl_table *ctl, int write,
2373
			     void *buffer, size_t *lenp, loff_t *ppos)
2374
{
2375
	int old_value = *(int *)ctl->data;
2376
	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2377
	int new_value = *(int *)ctl->data;
2378 2379 2380

	if (write) {
		struct ipv4_devconf *cnf = ctl->extra1;
2381
		struct net *net = ctl->extra2;
2382
		int i = (int *)ctl->data - cnf->data;
S
stephen hemminger 已提交
2383
		int ifindex;
2384 2385 2386

		set_bit(i, cnf->state);

2387
		if (cnf == net->ipv4.devconf_dflt)
2388
			devinet_copy_dflt_conf(net, i);
2389 2390
		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2391
			if ((new_value == 0) && (old_value != 0))
2392
				rt_cache_flush(net);
S
stephen hemminger 已提交
2393

2394 2395 2396 2397
		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
		    new_value != old_value)
			rt_cache_flush(net);

2398 2399
		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
		    new_value != old_value) {
S
stephen hemminger 已提交
2400
			ifindex = devinet_conf_ifindex(net, cnf);
2401 2402
			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
						    NETCONFA_RP_FILTER,
2403 2404
						    ifindex, cnf);
		}
S
stephen hemminger 已提交
2405 2406 2407
		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
		    new_value != old_value) {
			ifindex = devinet_conf_ifindex(net, cnf);
2408 2409
			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
						    NETCONFA_PROXY_NEIGH,
S
stephen hemminger 已提交
2410 2411
						    ifindex, cnf);
		}
2412 2413 2414
		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
		    new_value != old_value) {
			ifindex = devinet_conf_ifindex(net, cnf);
2415 2416
			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2417 2418
						    ifindex, cnf);
		}
2419 2420 2421 2422 2423
	}

	return ret;
}

2424
static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2425
				  void *buffer, size_t *lenp, loff_t *ppos)
L
Linus Torvalds 已提交
2426 2427 2428
{
	int *valp = ctl->data;
	int val = *valp;
E
Eric W. Biederman 已提交
2429
	loff_t pos = *ppos;
2430 2431
	struct net *net = ctl->extra2;
	int ret;
L
Linus Torvalds 已提交
2432

2433 2434 2435 2436
	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
		return -EPERM;

	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2437

2438
	if (write && *valp != val) {
2439
		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
E
Eric W. Biederman 已提交
2440 2441 2442 2443
			if (!rtnl_trylock()) {
				/* Restore the original values before restarting */
				*valp = val;
				*ppos = pos;
2444
				return restart_syscall();
E
Eric W. Biederman 已提交
2445
			}
2446 2447
			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
				inet_forward_change(net);
2448
			} else {
2449 2450 2451
				struct ipv4_devconf *cnf = ctl->extra1;
				struct in_device *idev =
					container_of(cnf, struct in_device, cnf);
2452 2453
				if (*valp)
					dev_disable_lro(idev->dev);
2454
				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2455 2456 2457
							    NETCONFA_FORWARDING,
							    idev->dev->ifindex,
							    cnf);
2458 2459
			}
			rtnl_unlock();
2460
			rt_cache_flush(net);
2461
		} else
2462 2463
			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
						    NETCONFA_FORWARDING,
2464 2465
						    NETCONFA_IFINDEX_DEFAULT,
						    net->ipv4.devconf_dflt);
L
Linus Torvalds 已提交
2466 2467 2468 2469 2470
	}

	return ret;
}

2471
static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2472
				void *buffer, size_t *lenp, loff_t *ppos)
L
Linus Torvalds 已提交
2473 2474 2475
{
	int *valp = ctl->data;
	int val = *valp;
2476
	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2477
	struct net *net = ctl->extra2;
L
Linus Torvalds 已提交
2478 2479

	if (write && *valp != val)
2480
		rt_cache_flush(net);
L
Linus Torvalds 已提交
2481 2482 2483 2484

	return ret;
}

2485
#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2486 2487 2488
	{ \
		.procname	= name, \
		.data		= ipv4_devconf.data + \
2489
				  IPV4_DEVCONF_ ## attr - 1, \
2490 2491 2492
		.maxlen		= sizeof(int), \
		.mode		= mval, \
		.proc_handler	= proc, \
2493
		.extra1		= &ipv4_devconf, \
2494 2495 2496
	}

#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2497
	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2498 2499

#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2500
	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2501

2502 2503
#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2504 2505

#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2506
	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2507

L
Linus Torvalds 已提交
2508 2509
static struct devinet_sysctl_table {
	struct ctl_table_header *sysctl_header;
2510
	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
L
Linus Torvalds 已提交
2511 2512
} devinet_sysctl = {
	.devinet_vars = {
2513
		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2514
					     devinet_sysctl_forward),
2515
		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2516
		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2517 2518 2519 2520 2521 2522 2523 2524

		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
					"accept_source_route"),
2525
		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2526
		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2527 2528 2529 2530 2531 2532 2533 2534 2535
		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2536
		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2537 2538
		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
					"arp_evict_nocarrier"),
2539
		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2540 2541
		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
					"force_igmp_version"),
2542 2543 2544 2545
		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
					"igmpv2_unsolicited_report_interval"),
		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
					"igmpv3_unsolicited_report_interval"),
2546 2547
		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
					"ignore_routes_with_linkdown"),
2548 2549
		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
					"drop_gratuitous_arp"),
2550 2551 2552 2553 2554

		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
					      "promote_secondaries"),
2555 2556
		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
					      "route_localnet"),
2557 2558
		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
					      "drop_unicast_in_l2_multicast"),
L
Linus Torvalds 已提交
2559 2560 2561
	},
};

2562
static int __devinet_sysctl_register(struct net *net, char *dev_name,
2563
				     int ifindex, struct ipv4_devconf *p)
L
Linus Torvalds 已提交
2564 2565
{
	int i;
2566
	struct devinet_sysctl_table *t;
2567
	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2568

2569
	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
L
Linus Torvalds 已提交
2570
	if (!t)
2571 2572
		goto out;

L
Linus Torvalds 已提交
2573 2574
	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2575
		t->devinet_vars[i].extra1 = p;
2576
		t->devinet_vars[i].extra2 = net;
L
Linus Torvalds 已提交
2577 2578
	}

2579
	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
L
Linus Torvalds 已提交
2580

2581
	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
L
Linus Torvalds 已提交
2582
	if (!t->sysctl_header)
2583
		goto free;
L
Linus Torvalds 已提交
2584 2585

	p->sysctl = t;
2586

2587 2588
	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
				    ifindex, p);
2589
	return 0;
L
Linus Torvalds 已提交
2590

2591
free:
L
Linus Torvalds 已提交
2592
	kfree(t);
2593
out:
L
liuguoqiang 已提交
2594
	return -ENOMEM;
L
Linus Torvalds 已提交
2595 2596
}

2597 2598
static void __devinet_sysctl_unregister(struct net *net,
					struct ipv4_devconf *cnf, int ifindex)
2599 2600 2601
{
	struct devinet_sysctl_table *t = cnf->sysctl;

2602 2603 2604 2605 2606
	if (t) {
		cnf->sysctl = NULL;
		unregister_net_sysctl_table(t->sysctl_header);
		kfree(t);
	}
2607

2608
	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2609 2610
}

2611
static int devinet_sysctl_register(struct in_device *idev)
2612
{
2613 2614 2615 2616 2617 2618 2619 2620 2621
	int err;

	if (!sysctl_dev_name_is_allowed(idev->dev->name))
		return -EINVAL;

	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
	if (err)
		return err;
	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2622
					idev->dev->ifindex, &idev->cnf);
2623 2624 2625
	if (err)
		neigh_sysctl_unregister(idev->arp_parms);
	return err;
2626 2627
}

2628
static void devinet_sysctl_unregister(struct in_device *idev)
L
Linus Torvalds 已提交
2629
{
2630 2631 2632
	struct net *net = dev_net(idev->dev);

	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2633
	neigh_sysctl_unregister(idev->arp_parms);
L
Linus Torvalds 已提交
2634 2635
}

2636 2637 2638 2639
static struct ctl_table ctl_forward_entry[] = {
	{
		.procname	= "ip_forward",
		.data		= &ipv4_devconf.data[
2640
					IPV4_DEVCONF_FORWARDING - 1],
2641 2642 2643 2644
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= devinet_sysctl_forward,
		.extra1		= &ipv4_devconf,
2645
		.extra2		= &init_net,
2646 2647 2648
	},
	{ },
};
2649
#endif
2650

2651 2652 2653 2654
static __net_init int devinet_init_net(struct net *net)
{
	int err;
	struct ipv4_devconf *all, *dflt;
2655
#ifdef CONFIG_SYSCTL
2656
	struct ctl_table *tbl;
2657
	struct ctl_table_header *forw_hdr;
2658
#endif
2659 2660

	err = -ENOMEM;
2661 2662 2663
	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
	if (!all)
		goto err_alloc_all;
2664

2665 2666 2667
	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
	if (!dflt)
		goto err_alloc_dflt;
2668

2669
#ifdef CONFIG_SYSCTL
2670 2671 2672
	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
	if (!tbl)
		goto err_alloc_ctl;
2673

2674 2675 2676
	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
	tbl[0].extra1 = all;
	tbl[0].extra2 = net;
2677
#endif
2678

2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696
	if (!net_eq(net, &init_net)) {
		if (IS_ENABLED(CONFIG_SYSCTL) &&
		    sysctl_devconf_inherit_init_net == 3) {
			/* copy from the current netns */
			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
			       sizeof(ipv4_devconf));
			memcpy(dflt,
			       current->nsproxy->net_ns->ipv4.devconf_dflt,
			       sizeof(ipv4_devconf_dflt));
		} else if (!IS_ENABLED(CONFIG_SYSCTL) ||
			   sysctl_devconf_inherit_init_net != 2) {
			/* inherit == 0 or 1: copy from init_net */
			memcpy(all, init_net.ipv4.devconf_all,
			       sizeof(ipv4_devconf));
			memcpy(dflt, init_net.ipv4.devconf_dflt,
			       sizeof(ipv4_devconf_dflt));
		}
		/* else inherit == 2: use compiled values */
2697 2698 2699
	}

#ifdef CONFIG_SYSCTL
2700
	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2701 2702 2703
	if (err < 0)
		goto err_reg_all;

2704 2705
	err = __devinet_sysctl_register(net, "default",
					NETCONFA_IFINDEX_DEFAULT, dflt);
2706 2707 2708 2709
	if (err < 0)
		goto err_reg_dflt;

	err = -ENOMEM;
2710
	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2711
	if (!forw_hdr)
2712
		goto err_reg_ctl;
2713
	net->ipv4.forw_hdr = forw_hdr;
2714 2715 2716 2717 2718 2719 2720 2721
#endif

	net->ipv4.devconf_all = all;
	net->ipv4.devconf_dflt = dflt;
	return 0;

#ifdef CONFIG_SYSCTL
err_reg_ctl:
2722
	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2723
err_reg_dflt:
2724
	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2725
err_reg_all:
2726
	kfree(tbl);
2727
err_alloc_ctl:
2728
#endif
2729
	kfree(dflt);
2730
err_alloc_dflt:
2731
	kfree(all);
2732 2733 2734 2735 2736 2737
err_alloc_all:
	return err;
}

static __net_exit void devinet_exit_net(struct net *net)
{
2738
#ifdef CONFIG_SYSCTL
2739 2740 2741 2742
	struct ctl_table *tbl;

	tbl = net->ipv4.forw_hdr->ctl_table_arg;
	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2743 2744 2745 2746
	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
				    NETCONFA_IFINDEX_DEFAULT);
	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
				    NETCONFA_IFINDEX_ALL);
2747
	kfree(tbl);
2748
#endif
2749 2750 2751 2752 2753 2754 2755 2756 2757
	kfree(net->ipv4.devconf_dflt);
	kfree(net->ipv4.devconf_all);
}

static __net_initdata struct pernet_operations devinet_ops = {
	.init = devinet_init_net,
	.exit = devinet_exit_net,
};

2758
static struct rtnl_af_ops inet_af_ops __read_mostly = {
T
Thomas Graf 已提交
2759 2760 2761
	.family		  = AF_INET,
	.fill_link_af	  = inet_fill_link_af,
	.get_link_af_size = inet_get_link_af_size,
2762 2763
	.validate_link_af = inet_validate_link_af,
	.set_link_af	  = inet_set_link_af,
T
Thomas Graf 已提交
2764 2765
};

L
Linus Torvalds 已提交
2766 2767
void __init devinet_init(void)
{
2768 2769 2770 2771 2772
	int i;

	for (i = 0; i < IN4_ADDR_HSIZE; i++)
		INIT_HLIST_HEAD(&inet_addr_lst[i]);

2773
	register_pernet_subsys(&devinet_ops);
L
Linus Torvalds 已提交
2774
	register_netdevice_notifier(&ip_netdev_notifier);
2775

2776
	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
J
Jiri Pirko 已提交
2777

T
Thomas Graf 已提交
2778 2779
	rtnl_af_register(&inet_af_ops);

2780 2781 2782
	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2783
	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2784
		      inet_netconf_dump_devconf, 0);
L
Linus Torvalds 已提交
2785
}