br_if.c 12.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *	Userspace interface
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek		<buytenh@gnu.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/netdevice.h>
16
#include <linux/etherdevice.h>
W
WANG Cong 已提交
17
#include <linux/netpoll.h>
L
Linus Torvalds 已提交
18 19 20 21 22
#include <linux/ethtool.h>
#include <linux/if_arp.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/rtnetlink.h>
23
#include <linux/if_ether.h>
24
#include <linux/slab.h>
L
Linus Torvalds 已提交
25
#include <net/sock.h>
26
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
27 28 29 30 31 32 33

#include "br_private.h"

/*
 * Determine initial path cost based on speed.
 * using recommendations from 802.1d standard
 *
34
 * Since driver might sleep need to not be holding any locks.
L
Linus Torvalds 已提交
35
 */
36
static int port_cost(struct net_device *dev)
L
Linus Torvalds 已提交
37
{
J
Jiri Pirko 已提交
38
	struct ethtool_cmd ecmd;
39

40
	if (!__ethtool_get_settings(dev, &ecmd)) {
J
Jiri Pirko 已提交
41 42 43 44 45 46 47 48 49
		switch (ethtool_cmd_speed(&ecmd)) {
		case SPEED_10000:
			return 2;
		case SPEED_1000:
			return 4;
		case SPEED_100:
			return 19;
		case SPEED_10:
			return 100;
L
Linus Torvalds 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62
		}
	}

	/* Old silly heuristics based on name */
	if (!strncmp(dev->name, "lec", 3))
		return 7;

	if (!strncmp(dev->name, "plip", 4))
		return 2500;

	return 100;	/* assume old 10Mbps */
}

63

T
tanxiaojun 已提交
64
/* Check for port carrier transitions. */
65
void br_port_carrier_check(struct net_bridge_port *p)
66
{
67 68
	struct net_device *dev = p->dev;
	struct net_bridge *br = p->br;
S
Stephen Hemminger 已提交
69

70 71
	if (!(p->flags & BR_ADMIN_COST) &&
	    netif_running(dev) && netif_oper_up(dev))
S
Stephen Hemminger 已提交
72 73
		p->path_cost = port_cost(dev);

74 75 76 77
	if (!netif_running(br->dev))
		return;

	spin_lock_bh(&br->lock);
78
	if (netif_running(dev) && netif_oper_up(dev)) {
79 80 81 82 83
		if (p->state == BR_STATE_DISABLED)
			br_stp_enable_port(p);
	} else {
		if (p->state != BR_STATE_DISABLED)
			br_stp_disable_port(p);
84
	}
85
	spin_unlock_bh(&br->lock);
86 87
}

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
static void br_port_set_promisc(struct net_bridge_port *p)
{
	int err = 0;

	if (br_promisc_port(p))
		return;

	err = dev_set_promiscuity(p->dev, 1);
	if (err)
		return;

	br_fdb_unsync_static(p->br, p);
	p->flags |= BR_PROMISC;
}

static void br_port_clear_promisc(struct net_bridge_port *p)
{
	int err;

	/* Check if the port is already non-promisc or if it doesn't
	 * support UNICAST filtering.  Without unicast filtering support
	 * we'll end up re-enabling promisc mode anyway, so just check for
	 * it here.
	 */
	if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
		return;

	/* Since we'll be clearing the promisc mode, program the port
	 * first so that we don't have interruption in traffic.
	 */
	err = br_fdb_sync_static(p->br, p);
	if (err)
		return;

	dev_set_promiscuity(p->dev, -1);
	p->flags &= ~BR_PROMISC;
}

/* When a port is added or removed or when certain port flags
 * change, this function is called to automatically manage
 * promiscuity setting of all the bridge ports.  We are always called
 * under RTNL so can skip using rcu primitives.
 */
void br_manage_promisc(struct net_bridge *br)
{
	struct net_bridge_port *p;
	bool set_all = false;

	/* If vlan filtering is disabled or bridge interface is placed
	 * into promiscuous mode, place all ports in promiscuous mode.
	 */
	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
		set_all = true;

	list_for_each_entry(p, &br->port_list, list) {
		if (set_all) {
			br_port_set_promisc(p);
		} else {
			/* If the number of auto-ports is <= 1, then all other
			 * ports will have their output configuration
			 * statically specified through fdbs.  Since ingress
			 * on the auto-port becomes forwarding/egress to other
			 * ports and egress configuration is statically known,
			 * we can say that ingress configuration of the
			 * auto-port is also statically known.
			 * This lets us disable promiscuous mode and write
			 * this config to hw.
			 */
			if (br->auto_cnt <= br_auto_port(p))
				br_port_clear_promisc(p);
			else
				br_port_set_promisc(p);
		}
	}
}

164 165 166 167 168 169 170 171 172
static void nbp_update_port_count(struct net_bridge *br)
{
	struct net_bridge_port *p;
	u32 cnt = 0;

	list_for_each_entry(p, &br->port_list, list) {
		if (br_auto_port(p))
			cnt++;
	}
173 174 175 176 177 178 179 180
	if (br->auto_cnt != cnt) {
		br->auto_cnt = cnt;
		br_manage_promisc(br);
	}
}

static void nbp_delete_promisc(struct net_bridge_port *p)
{
181
	/* If port is currently promiscuous, unset promiscuity.
182 183 184 185 186 187 188 189
	 * Otherwise, it is a static port so remove all addresses
	 * from it.
	 */
	dev_set_allmulti(p->dev, -1);
	if (br_promisc_port(p))
		dev_set_promiscuity(p->dev, -1);
	else
		br_fdb_unsync_static(p->br, p);
190 191
}

192 193 194 195 196 197 198 199 200 201 202 203 204 205
static void release_nbp(struct kobject *kobj)
{
	struct net_bridge_port *p
		= container_of(kobj, struct net_bridge_port, kobj);
	kfree(p);
}

static struct kobj_type brport_ktype = {
#ifdef CONFIG_SYSFS
	.sysfs_ops = &brport_sysfs_ops,
#endif
	.release = release_nbp,
};

L
Linus Torvalds 已提交
206 207 208 209 210 211 212 213
static void destroy_nbp(struct net_bridge_port *p)
{
	struct net_device *dev = p->dev;

	p->br = NULL;
	p->dev = NULL;
	dev_put(dev);

214
	kobject_put(&p->kobj);
L
Linus Torvalds 已提交
215 216 217 218 219 220 221 222 223
}

static void destroy_nbp_rcu(struct rcu_head *head)
{
	struct net_bridge_port *p =
			container_of(head, struct net_bridge_port, rcu);
	destroy_nbp(p);
}

224 225 226 227 228 229 230 231 232
/* Delete port(interface) from bridge is done in two steps.
 * via RCU. First step, marks device as down. That deletes
 * all the timers and stops new packets from flowing through.
 *
 * Final cleanup doesn't occur until after all CPU's finished
 * processing packets.
 *
 * Protected from multiple admin operations by RTNL mutex
 */
L
Linus Torvalds 已提交
233 234 235 236 237
static void del_nbp(struct net_bridge_port *p)
{
	struct net_bridge *br = p->br;
	struct net_device *dev = p->dev;

238
	sysfs_remove_link(br->ifobj, p->dev->name);
239

240
	nbp_delete_promisc(p);
L
Linus Torvalds 已提交
241 242 243 244 245

	spin_lock_bh(&br->lock);
	br_stp_disable_port(p);
	spin_unlock_bh(&br->lock);

246 247
	br_ifinfo_notify(RTM_DELLINK, p);

L
Linus Torvalds 已提交
248 249
	list_del_rcu(&p->list);

250 251
	nbp_vlan_flush(p);
	br_fdb_delete_by_port(br, p, 1);
252 253
	nbp_update_port_count(br);

254 255
	dev->priv_flags &= ~IFF_BRIDGE_PORT;

256
	netdev_rx_handler_unregister(dev);
257

258
	netdev_upper_dev_unlink(dev, br->dev);
259

260 261
	br_multicast_del_port(p);

262
	kobject_uevent(&p->kobj, KOBJ_REMOVE);
263 264
	kobject_del(&p->kobj);

H
Herbert Xu 已提交
265 266
	br_netpoll_disable(p);

L
Linus Torvalds 已提交
267 268 269
	call_rcu(&p->rcu, destroy_nbp_rcu);
}

270 271
/* Delete bridge device */
void br_dev_delete(struct net_device *dev, struct list_head *head)
L
Linus Torvalds 已提交
272
{
273
	struct net_bridge *br = netdev_priv(dev);
L
Linus Torvalds 已提交
274 275 276 277 278 279
	struct net_bridge_port *p, *n;

	list_for_each_entry_safe(p, n, &br->port_list, list) {
		del_nbp(p);
	}

280 281
	br_fdb_delete_by_port(br, NULL, 1);

282
	br_vlan_flush(br);
L
Linus Torvalds 已提交
283 284 285
	del_timer_sync(&br->gc_timer);

	br_sysfs_delbr(br->dev);
286
	unregister_netdevice_queue(br->dev, head);
L
Linus Torvalds 已提交
287 288 289 290 291 292 293 294 295
}

/* find an available port number */
static int find_portno(struct net_bridge *br)
{
	int index;
	struct net_bridge_port *p;
	unsigned long *inuse;

S
Stephen Hemminger 已提交
296
	inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
L
Linus Torvalds 已提交
297 298 299 300 301 302 303 304 305 306 307 308 309 310
			GFP_KERNEL);
	if (!inuse)
		return -ENOMEM;

	set_bit(0, inuse);	/* zero is reserved */
	list_for_each_entry(p, &br->port_list, list) {
		set_bit(p->port_no, inuse);
	}
	index = find_first_zero_bit(inuse, BR_MAX_PORTS);
	kfree(inuse);

	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
}

311
/* called with RTNL but without bridge lock */
312
static struct net_bridge_port *new_nbp(struct net_bridge *br,
313
				       struct net_device *dev)
L
Linus Torvalds 已提交
314 315 316
{
	int index;
	struct net_bridge_port *p;
317

L
Linus Torvalds 已提交
318 319 320 321
	index = find_portno(br);
	if (index < 0)
		return ERR_PTR(index);

S
Stephen Hemminger 已提交
322
	p = kzalloc(sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
323 324 325 326 327 328
	if (p == NULL)
		return ERR_PTR(-ENOMEM);

	p->br = br;
	dev_hold(dev);
	p->dev = dev;
329
	p->path_cost = port_cost(dev);
330
	p->priority = 0x8000 >> BR_PORT_BITS;
L
Linus Torvalds 已提交
331
	p->port_no = index;
332
	p->flags = BR_LEARNING | BR_FLOOD;
L
Linus Torvalds 已提交
333 334
	br_init_port(p);
	p->state = BR_STATE_DISABLED;
335
	br_stp_port_timer_init(p);
336
	br_multicast_add_port(p);
L
Linus Torvalds 已提交
337 338 339 340

	return p;
}

341
int br_add_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
342 343
{
	struct net_device *dev;
344
	int res;
L
Linus Torvalds 已提交
345

346 347 348
	dev = alloc_netdev(sizeof(struct net_bridge), name,
			   br_dev_setup);

349
	if (!dev)
L
Linus Torvalds 已提交
350 351
		return -ENOMEM;

352
	dev_net_set(dev, net);
353
	dev->rtnl_link_ops = &br_link_ops;
354

355 356 357 358
	res = register_netdev(dev);
	if (res)
		free_netdev(dev);
	return res;
L
Linus Torvalds 已提交
359 360
}

361
int br_del_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
362 363 364 365 366
{
	struct net_device *dev;
	int ret = 0;

	rtnl_lock();
367
	dev = __dev_get_by_name(net, name);
368
	if (dev == NULL)
L
Linus Torvalds 已提交
369 370 371 372 373 374 375 376 377 378
		ret =  -ENXIO; 	/* Could not find device */

	else if (!(dev->priv_flags & IFF_EBRIDGE)) {
		/* Attempt to delete non bridge device! */
		ret = -EPERM;
	}

	else if (dev->flags & IFF_UP) {
		/* Not shutdown yet. */
		ret = -EBUSY;
379
	}
L
Linus Torvalds 已提交
380

381
	else
382
		br_dev_delete(dev, NULL);
L
Linus Torvalds 已提交
383 384 385 386 387

	rtnl_unlock();
	return ret;
}

388
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
L
Linus Torvalds 已提交
389 390 391 392 393 394 395 396
int br_min_mtu(const struct net_bridge *br)
{
	const struct net_bridge_port *p;
	int mtu = 0;

	ASSERT_RTNL();

	if (list_empty(&br->port_list))
397
		mtu = ETH_DATA_LEN;
L
Linus Torvalds 已提交
398 399 400 401 402 403 404 405 406
	else {
		list_for_each_entry(p, &br->port_list, list) {
			if (!mtu  || p->dev->mtu < mtu)
				mtu = p->dev->mtu;
		}
	}
	return mtu;
}

407 408 409
/*
 * Recomputes features using slave's features
 */
410 411
netdev_features_t br_features_recompute(struct net_bridge *br,
	netdev_features_t features)
412 413
{
	struct net_bridge_port *p;
414
	netdev_features_t mask;
415

416
	if (list_empty(&br->port_list))
417
		return features;
418

419
	mask = features;
420
	features &= ~NETIF_F_ONE_FOR_ALL;
421 422

	list_for_each_entry(p, &br->port_list, list) {
423 424
		features = netdev_increment_features(features,
						     p->dev->features, mask);
425 426
	}

427
	return features;
428 429
}

L
Linus Torvalds 已提交
430 431 432 433 434
/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	int err = 0;
435
	bool changed_addr;
L
Linus Torvalds 已提交
436

437 438
	/* Don't allow bridging non-ethernet like devices */
	if ((dev->flags & IFF_LOOPBACK) ||
439 440
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
	    !is_valid_ether_addr(dev->dev_addr))
L
Linus Torvalds 已提交
441 442
		return -EINVAL;

443
	/* No bridging of bridges */
444
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
L
Linus Torvalds 已提交
445 446
		return -ELOOP;

447
	/* Device is already being bridged */
448
	if (br_port_exists(dev))
L
Linus Torvalds 已提交
449 450
		return -EBUSY;

451 452 453 454
	/* No bridging devices that dislike that (e.g. wireless) */
	if (dev->priv_flags & IFF_DONT_BRIDGE)
		return -EOPNOTSUPP;

455 456
	p = new_nbp(br, dev);
	if (IS_ERR(p))
L
Linus Torvalds 已提交
457 458
		return PTR_ERR(p);

459 460
	call_netdevice_notifiers(NETDEV_JOIN, dev);

461
	err = dev_set_allmulti(dev, 1);
462 463 464
	if (err)
		goto put_back;

465 466
	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
				   SYSFS_BRIDGE_PORT_ATTR);
467 468
	if (err)
		goto err1;
L
Linus Torvalds 已提交
469

470 471 472
	err = br_sysfs_addif(p);
	if (err)
		goto err2;
L
Linus Torvalds 已提交
473

474
	err = br_netpoll_enable(p);
S
stephen hemminger 已提交
475
	if (err)
H
Herbert Xu 已提交
476 477
		goto err3;

478
	err = netdev_master_upper_dev_link(dev, br->dev);
479
	if (err)
480
		goto err4;
481

482 483
	err = netdev_rx_handler_register(dev, br_handle_frame, p);
	if (err)
484
		goto err5;
485

486
	dev->priv_flags |= IFF_BRIDGE_PORT;
487

488
	dev_disable_lro(dev);
489 490 491

	list_add_rcu(&p->list, &br->port_list);

492 493
	nbp_update_port_count(br);

494 495
	netdev_update_features(br->dev);

496 497 498
	if (br->dev->needed_headroom < dev->needed_headroom)
		br->dev->needed_headroom = dev->needed_headroom;

499 500 501
	if (br_fdb_insert(br, p, dev->dev_addr, 0))
		netdev_err(dev, "failed insert local address bridge forwarding table\n");

502
	spin_lock_bh(&br->lock);
503
	changed_addr = br_stp_recalculate_bridge_id(br);
504

505
	if (netif_running(dev) && netif_oper_up(dev) &&
506 507
	    (br->dev->flags & IFF_UP))
		br_stp_enable_port(p);
508 509
	spin_unlock_bh(&br->lock);

510 511
	br_ifinfo_notify(RTM_NEWLINK, p);

512
	if (changed_addr)
513
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
514

515
	dev_set_mtu(br->dev, br_min_mtu(br));
516

517
	kobject_uevent(&p->kobj, KOBJ_ADD);
L
Linus Torvalds 已提交
518

519
	return 0;
520

521
err5:
522
	netdev_upper_dev_unlink(dev, br->dev);
523 524
err4:
	br_netpoll_disable(p);
H
Herbert Xu 已提交
525 526
err3:
	sysfs_remove_link(br->ifobj, p->dev->name);
527
err2:
528
	kobject_put(&p->kobj);
529
	p = NULL; /* kobject_put frees */
530
err1:
531
	dev_set_promiscuity(dev, -1);
532 533
put_back:
	dev_put(dev);
534
	kfree(p);
L
Linus Torvalds 已提交
535 536 537 538 539 540
	return err;
}

/* called with RTNL */
int br_del_if(struct net_bridge *br, struct net_device *dev)
{
541
	struct net_bridge_port *p;
542
	bool changed_addr;
543

544
	p = br_port_get_rtnl(dev);
545
	if (!p || p->br != br)
L
Linus Torvalds 已提交
546 547
		return -EINVAL;

548 549 550 551
	/* Since more than one interface can be attached to a bridge,
	 * there still maybe an alternate path for netconsole to use;
	 * therefore there is no reason for a NETDEV_RELEASE event.
	 */
L
Linus Torvalds 已提交
552 553 554
	del_nbp(p);

	spin_lock_bh(&br->lock);
555
	changed_addr = br_stp_recalculate_bridge_id(br);
L
Linus Torvalds 已提交
556 557
	spin_unlock_bh(&br->lock);

558 559 560
	if (changed_addr)
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);

561 562
	netdev_update_features(br->dev);

L
Linus Torvalds 已提交
563 564
	return 0;
}
565 566 567 568 569 570 571 572

void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
{
	struct net_bridge *br = p->br;

	if (mask & BR_AUTO_MASK)
		nbp_update_port_count(br);
}