br_if.c 14.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *	Userspace interface
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek		<buytenh@gnu.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/netdevice.h>
16
#include <linux/etherdevice.h>
W
WANG Cong 已提交
17
#include <linux/netpoll.h>
L
Linus Torvalds 已提交
18 19 20 21 22
#include <linux/ethtool.h>
#include <linux/if_arp.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/rtnetlink.h>
23
#include <linux/if_ether.h>
24
#include <linux/slab.h>
25
#include <net/dsa.h>
L
Linus Torvalds 已提交
26
#include <net/sock.h>
27
#include <linux/if_vlan.h>
28
#include <net/switchdev.h>
L
Linus Torvalds 已提交
29 30 31 32 33 34 35

#include "br_private.h"

/*
 * Determine initial path cost based on speed.
 * using recommendations from 802.1d standard
 *
36
 * Since driver might sleep need to not be holding any locks.
L
Linus Torvalds 已提交
37
 */
38
static int port_cost(struct net_device *dev)
L
Linus Torvalds 已提交
39
{
40
	struct ethtool_link_ksettings ecmd;
41

42 43
	if (!__ethtool_get_link_ksettings(dev, &ecmd)) {
		switch (ecmd.base.speed) {
J
Jiri Pirko 已提交
44 45 46 47 48 49 50 51
		case SPEED_10000:
			return 2;
		case SPEED_1000:
			return 4;
		case SPEED_100:
			return 19;
		case SPEED_10:
			return 100;
L
Linus Torvalds 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64
		}
	}

	/* Old silly heuristics based on name */
	if (!strncmp(dev->name, "lec", 3))
		return 7;

	if (!strncmp(dev->name, "plip", 4))
		return 2500;

	return 100;	/* assume old 10Mbps */
}

65

T
tanxiaojun 已提交
66
/* Check for port carrier transitions. */
67
void br_port_carrier_check(struct net_bridge_port *p)
68
{
69 70
	struct net_device *dev = p->dev;
	struct net_bridge *br = p->br;
S
Stephen Hemminger 已提交
71

72 73
	if (!(p->flags & BR_ADMIN_COST) &&
	    netif_running(dev) && netif_oper_up(dev))
S
Stephen Hemminger 已提交
74 75
		p->path_cost = port_cost(dev);

76 77 78 79
	if (!netif_running(br->dev))
		return;

	spin_lock_bh(&br->lock);
80
	if (netif_running(dev) && netif_oper_up(dev)) {
81 82 83 84 85
		if (p->state == BR_STATE_DISABLED)
			br_stp_enable_port(p);
	} else {
		if (p->state != BR_STATE_DISABLED)
			br_stp_disable_port(p);
86
	}
87
	spin_unlock_bh(&br->lock);
88 89
}

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
static void br_port_set_promisc(struct net_bridge_port *p)
{
	int err = 0;

	if (br_promisc_port(p))
		return;

	err = dev_set_promiscuity(p->dev, 1);
	if (err)
		return;

	br_fdb_unsync_static(p->br, p);
	p->flags |= BR_PROMISC;
}

static void br_port_clear_promisc(struct net_bridge_port *p)
{
	int err;

	/* Check if the port is already non-promisc or if it doesn't
	 * support UNICAST filtering.  Without unicast filtering support
	 * we'll end up re-enabling promisc mode anyway, so just check for
	 * it here.
	 */
	if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
		return;

	/* Since we'll be clearing the promisc mode, program the port
	 * first so that we don't have interruption in traffic.
	 */
	err = br_fdb_sync_static(p->br, p);
	if (err)
		return;

	dev_set_promiscuity(p->dev, -1);
	p->flags &= ~BR_PROMISC;
}

/* When a port is added or removed or when certain port flags
 * change, this function is called to automatically manage
 * promiscuity setting of all the bridge ports.  We are always called
 * under RTNL so can skip using rcu primitives.
 */
void br_manage_promisc(struct net_bridge *br)
{
	struct net_bridge_port *p;
	bool set_all = false;

	/* If vlan filtering is disabled or bridge interface is placed
	 * into promiscuous mode, place all ports in promiscuous mode.
	 */
141
	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev))
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
		set_all = true;

	list_for_each_entry(p, &br->port_list, list) {
		if (set_all) {
			br_port_set_promisc(p);
		} else {
			/* If the number of auto-ports is <= 1, then all other
			 * ports will have their output configuration
			 * statically specified through fdbs.  Since ingress
			 * on the auto-port becomes forwarding/egress to other
			 * ports and egress configuration is statically known,
			 * we can say that ingress configuration of the
			 * auto-port is also statically known.
			 * This lets us disable promiscuous mode and write
			 * this config to hw.
			 */
158 159
			if (br->auto_cnt == 0 ||
			    (br->auto_cnt == 1 && br_auto_port(p)))
160 161 162 163 164 165 166
				br_port_clear_promisc(p);
			else
				br_port_set_promisc(p);
		}
	}
}

167 168 169 170 171 172 173 174 175
static void nbp_update_port_count(struct net_bridge *br)
{
	struct net_bridge_port *p;
	u32 cnt = 0;

	list_for_each_entry(p, &br->port_list, list) {
		if (br_auto_port(p))
			cnt++;
	}
176 177 178 179 180 181 182 183
	if (br->auto_cnt != cnt) {
		br->auto_cnt = cnt;
		br_manage_promisc(br);
	}
}

static void nbp_delete_promisc(struct net_bridge_port *p)
{
184
	/* If port is currently promiscuous, unset promiscuity.
185 186 187 188 189 190 191 192
	 * Otherwise, it is a static port so remove all addresses
	 * from it.
	 */
	dev_set_allmulti(p->dev, -1);
	if (br_promisc_port(p))
		dev_set_promiscuity(p->dev, -1);
	else
		br_fdb_unsync_static(p->br, p);
193 194
}

195 196 197 198 199 200 201 202 203 204 205 206 207 208
static void release_nbp(struct kobject *kobj)
{
	struct net_bridge_port *p
		= container_of(kobj, struct net_bridge_port, kobj);
	kfree(p);
}

static struct kobj_type brport_ktype = {
#ifdef CONFIG_SYSFS
	.sysfs_ops = &brport_sysfs_ops,
#endif
	.release = release_nbp,
};

L
Linus Torvalds 已提交
209 210 211 212 213 214 215 216
static void destroy_nbp(struct net_bridge_port *p)
{
	struct net_device *dev = p->dev;

	p->br = NULL;
	p->dev = NULL;
	dev_put(dev);

217
	kobject_put(&p->kobj);
L
Linus Torvalds 已提交
218 219 220 221 222 223 224 225 226
}

static void destroy_nbp_rcu(struct rcu_head *head)
{
	struct net_bridge_port *p =
			container_of(head, struct net_bridge_port, rcu);
	destroy_nbp(p);
}

227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
static unsigned get_max_headroom(struct net_bridge *br)
{
	unsigned max_headroom = 0;
	struct net_bridge_port *p;

	list_for_each_entry(p, &br->port_list, list) {
		unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);

		if (dev_headroom > max_headroom)
			max_headroom = dev_headroom;
	}

	return max_headroom;
}

static void update_headroom(struct net_bridge *br, int new_hr)
{
	struct net_bridge_port *p;

	list_for_each_entry(p, &br->port_list, list)
		netdev_set_rx_headroom(p->dev, new_hr);

	br->dev->needed_headroom = new_hr;
}

252 253 254 255 256 257 258 259 260
/* Delete port(interface) from bridge is done in two steps.
 * via RCU. First step, marks device as down. That deletes
 * all the timers and stops new packets from flowing through.
 *
 * Final cleanup doesn't occur until after all CPU's finished
 * processing packets.
 *
 * Protected from multiple admin operations by RTNL mutex
 */
L
Linus Torvalds 已提交
261 262 263 264 265
static void del_nbp(struct net_bridge_port *p)
{
	struct net_bridge *br = p->br;
	struct net_device *dev = p->dev;

266
	sysfs_remove_link(br->ifobj, p->dev->name);
267

268
	nbp_delete_promisc(p);
L
Linus Torvalds 已提交
269 270 271 272 273

	spin_lock_bh(&br->lock);
	br_stp_disable_port(p);
	spin_unlock_bh(&br->lock);

274 275
	br_ifinfo_notify(RTM_DELLINK, p);

L
Linus Torvalds 已提交
276
	list_del_rcu(&p->list);
277 278 279
	if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
		update_headroom(br, get_max_headroom(br));
	netdev_reset_rx_headroom(dev);
L
Linus Torvalds 已提交
280

281
	nbp_vlan_flush(p);
282
	br_fdb_delete_by_port(br, p, 0, 1);
283 284
	switchdev_deferred_process();

285 286
	nbp_update_port_count(br);

287 288
	netdev_upper_dev_unlink(dev, br->dev);

289 290
	dev->priv_flags &= ~IFF_BRIDGE_PORT;

291
	netdev_rx_handler_unregister(dev);
292

293 294
	br_multicast_del_port(p);

295
	kobject_uevent(&p->kobj, KOBJ_REMOVE);
296 297
	kobject_del(&p->kobj);

H
Herbert Xu 已提交
298 299
	br_netpoll_disable(p);

L
Linus Torvalds 已提交
300 301 302
	call_rcu(&p->rcu, destroy_nbp_rcu);
}

303 304
/* Delete bridge device */
void br_dev_delete(struct net_device *dev, struct list_head *head)
L
Linus Torvalds 已提交
305
{
306
	struct net_bridge *br = netdev_priv(dev);
L
Linus Torvalds 已提交
307 308 309 310 311 312
	struct net_bridge_port *p, *n;

	list_for_each_entry_safe(p, n, &br->port_list, list) {
		del_nbp(p);
	}

313
	br_fdb_delete_by_port(br, NULL, 0, 1);
314

315
	cancel_delayed_work_sync(&br->gc_work);
L
Linus Torvalds 已提交
316 317

	br_sysfs_delbr(br->dev);
318
	unregister_netdevice_queue(br->dev, head);
L
Linus Torvalds 已提交
319 320 321 322 323 324 325 326 327
}

/* find an available port number */
static int find_portno(struct net_bridge *br)
{
	int index;
	struct net_bridge_port *p;
	unsigned long *inuse;

S
Stephen Hemminger 已提交
328
	inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
L
Linus Torvalds 已提交
329 330 331 332 333 334 335 336 337 338 339 340 341 342
			GFP_KERNEL);
	if (!inuse)
		return -ENOMEM;

	set_bit(0, inuse);	/* zero is reserved */
	list_for_each_entry(p, &br->port_list, list) {
		set_bit(p->port_no, inuse);
	}
	index = find_first_zero_bit(inuse, BR_MAX_PORTS);
	kfree(inuse);

	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
}

343
/* called with RTNL but without bridge lock */
344
static struct net_bridge_port *new_nbp(struct net_bridge *br,
345
				       struct net_device *dev)
L
Linus Torvalds 已提交
346 347
{
	struct net_bridge_port *p;
348
	int index, err;
349

L
Linus Torvalds 已提交
350 351 352 353
	index = find_portno(br);
	if (index < 0)
		return ERR_PTR(index);

S
Stephen Hemminger 已提交
354
	p = kzalloc(sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
355 356 357 358 359 360
	if (p == NULL)
		return ERR_PTR(-ENOMEM);

	p->br = br;
	dev_hold(dev);
	p->dev = dev;
361
	p->path_cost = port_cost(dev);
362
	p->priority = 0x8000 >> BR_PORT_BITS;
L
Linus Torvalds 已提交
363
	p->port_no = index;
364
	p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
L
Linus Torvalds 已提交
365
	br_init_port(p);
366
	br_set_state(p, BR_STATE_DISABLED);
367
	br_stp_port_timer_init(p);
368 369 370 371 372 373
	err = br_multicast_add_port(p);
	if (err) {
		dev_put(dev);
		kfree(p);
		p = ERR_PTR(err);
	}
L
Linus Torvalds 已提交
374 375 376 377

	return p;
}

378
int br_add_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
379 380
{
	struct net_device *dev;
381
	int res;
L
Linus Torvalds 已提交
382

383
	dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
384 385
			   br_dev_setup);

386
	if (!dev)
L
Linus Torvalds 已提交
387 388
		return -ENOMEM;

389
	dev_net_set(dev, net);
390
	dev->rtnl_link_ops = &br_link_ops;
391

392 393 394 395
	res = register_netdev(dev);
	if (res)
		free_netdev(dev);
	return res;
L
Linus Torvalds 已提交
396 397
}

398
int br_del_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
399 400 401 402 403
{
	struct net_device *dev;
	int ret = 0;

	rtnl_lock();
404
	dev = __dev_get_by_name(net, name);
405
	if (dev == NULL)
L
Linus Torvalds 已提交
406 407 408 409 410 411 412 413 414 415
		ret =  -ENXIO; 	/* Could not find device */

	else if (!(dev->priv_flags & IFF_EBRIDGE)) {
		/* Attempt to delete non bridge device! */
		ret = -EPERM;
	}

	else if (dev->flags & IFF_UP) {
		/* Not shutdown yet. */
		ret = -EBUSY;
416
	}
L
Linus Torvalds 已提交
417

418
	else
419
		br_dev_delete(dev, NULL);
L
Linus Torvalds 已提交
420 421 422 423 424

	rtnl_unlock();
	return ret;
}

425
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
L
Linus Torvalds 已提交
426 427 428 429 430 431 432 433
int br_min_mtu(const struct net_bridge *br)
{
	const struct net_bridge_port *p;
	int mtu = 0;

	ASSERT_RTNL();

	if (list_empty(&br->port_list))
434
		mtu = ETH_DATA_LEN;
L
Linus Torvalds 已提交
435 436 437 438 439 440 441 442 443
	else {
		list_for_each_entry(p, &br->port_list, list) {
			if (!mtu  || p->dev->mtu < mtu)
				mtu = p->dev->mtu;
		}
	}
	return mtu;
}

444 445 446 447 448 449 450 451 452 453 454 455 456 457
static void br_set_gso_limits(struct net_bridge *br)
{
	unsigned int gso_max_size = GSO_MAX_SIZE;
	u16 gso_max_segs = GSO_MAX_SEGS;
	const struct net_bridge_port *p;

	list_for_each_entry(p, &br->port_list, list) {
		gso_max_size = min(gso_max_size, p->dev->gso_max_size);
		gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
	}
	br->dev->gso_max_size = gso_max_size;
	br->dev->gso_max_segs = gso_max_segs;
}

458 459 460
/*
 * Recomputes features using slave's features
 */
461 462
netdev_features_t br_features_recompute(struct net_bridge *br,
	netdev_features_t features)
463 464
{
	struct net_bridge_port *p;
465
	netdev_features_t mask;
466

467
	if (list_empty(&br->port_list))
468
		return features;
469

470
	mask = features;
471
	features &= ~NETIF_F_ONE_FOR_ALL;
472 473

	list_for_each_entry(p, &br->port_list, list) {
474 475
		features = netdev_increment_features(features,
						     p->dev->features, mask);
476
	}
477
	features = netdev_add_tso_features(features, mask);
478

479
	return features;
480 481
}

L
Linus Torvalds 已提交
482 483 484 485 486
/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	int err = 0;
487
	unsigned br_hr, dev_hr;
488
	bool changed_addr;
L
Linus Torvalds 已提交
489

490 491 492 493 494 495
	/* Don't allow bridging non-ethernet like devices, or DSA-enabled
	 * master network devices since the bridge layer rx_handler prevents
	 * the DSA fake ethertype handler to be invoked, so we do not strip off
	 * the DSA switch tag protocol header and the bridge layer just return
	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
	 */
496
	if ((dev->flags & IFF_LOOPBACK) ||
497
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
498 499
	    !is_valid_ether_addr(dev->dev_addr) ||
	    netdev_uses_dsa(dev))
L
Linus Torvalds 已提交
500 501
		return -EINVAL;

502
	/* No bridging of bridges */
503
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
L
Linus Torvalds 已提交
504 505
		return -ELOOP;

506
	/* Device is already being bridged */
507
	if (br_port_exists(dev))
L
Linus Torvalds 已提交
508 509
		return -EBUSY;

510 511 512 513
	/* No bridging devices that dislike that (e.g. wireless) */
	if (dev->priv_flags & IFF_DONT_BRIDGE)
		return -EOPNOTSUPP;

514 515
	p = new_nbp(br, dev);
	if (IS_ERR(p))
L
Linus Torvalds 已提交
516 517
		return PTR_ERR(p);

518 519
	call_netdevice_notifiers(NETDEV_JOIN, dev);

520
	err = dev_set_allmulti(dev, 1);
521 522 523
	if (err)
		goto put_back;

524 525
	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
				   SYSFS_BRIDGE_PORT_ATTR);
526 527
	if (err)
		goto err1;
L
Linus Torvalds 已提交
528

529 530 531
	err = br_sysfs_addif(p);
	if (err)
		goto err2;
L
Linus Torvalds 已提交
532

533
	err = br_netpoll_enable(p);
S
stephen hemminger 已提交
534
	if (err)
H
Herbert Xu 已提交
535 536
		goto err3;

537
	err = netdev_rx_handler_register(dev, br_handle_frame, p);
538
	if (err)
539
		goto err4;
540

541 542
	dev->priv_flags |= IFF_BRIDGE_PORT;

543
	err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
544
	if (err)
545
		goto err5;
546

547 548 549 550
	err = nbp_switchdev_mark_set(p);
	if (err)
		goto err6;

551
	dev_disable_lro(dev);
552 553 554

	list_add_rcu(&p->list, &br->port_list);

555 556
	nbp_update_port_count(br);

557 558
	netdev_update_features(br->dev);

559 560 561 562 563 564
	br_hr = br->dev->needed_headroom;
	dev_hr = netdev_get_fwd_headroom(dev);
	if (br_hr < dev_hr)
		update_headroom(br, dev_hr);
	else
		netdev_set_rx_headroom(dev, br_hr);
565

566 567 568
	if (br_fdb_insert(br, p, dev->dev_addr, 0))
		netdev_err(dev, "failed insert local address bridge forwarding table\n");

569 570
	err = nbp_vlan_init(p);
	if (err) {
571
		netdev_err(dev, "failed to initialize vlan filtering on this port\n");
572
		goto err7;
573
	}
574

575
	spin_lock_bh(&br->lock);
576
	changed_addr = br_stp_recalculate_bridge_id(br);
577

578
	if (netif_running(dev) && netif_oper_up(dev) &&
579 580
	    (br->dev->flags & IFF_UP))
		br_stp_enable_port(p);
581 582
	spin_unlock_bh(&br->lock);

583 584
	br_ifinfo_notify(RTM_NEWLINK, p);

585
	if (changed_addr)
586
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
587

588
	dev_set_mtu(br->dev, br_min_mtu(br));
589
	br_set_gso_limits(br);
590

591
	kobject_uevent(&p->kobj, KOBJ_ADD);
L
Linus Torvalds 已提交
592

593
	return 0;
594

595
err7:
596 597 598
	list_del_rcu(&p->list);
	br_fdb_delete_by_port(br, p, 0, 1);
	nbp_update_port_count(br);
599
err6:
600
	netdev_upper_dev_unlink(dev, br->dev);
601
err5:
602 603
	dev->priv_flags &= ~IFF_BRIDGE_PORT;
	netdev_rx_handler_unregister(dev);
604 605
err4:
	br_netpoll_disable(p);
H
Herbert Xu 已提交
606 607
err3:
	sysfs_remove_link(br->ifobj, p->dev->name);
608
err2:
609
	kobject_put(&p->kobj);
610
	p = NULL; /* kobject_put frees */
611
err1:
612
	dev_set_allmulti(dev, -1);
613 614
put_back:
	dev_put(dev);
615
	kfree(p);
L
Linus Torvalds 已提交
616 617 618 619 620 621
	return err;
}

/* called with RTNL */
int br_del_if(struct net_bridge *br, struct net_device *dev)
{
622
	struct net_bridge_port *p;
623
	bool changed_addr;
624

625
	p = br_port_get_rtnl(dev);
626
	if (!p || p->br != br)
L
Linus Torvalds 已提交
627 628
		return -EINVAL;

629 630 631 632
	/* Since more than one interface can be attached to a bridge,
	 * there still maybe an alternate path for netconsole to use;
	 * therefore there is no reason for a NETDEV_RELEASE event.
	 */
L
Linus Torvalds 已提交
633 634
	del_nbp(p);

635
	dev_set_mtu(br->dev, br_min_mtu(br));
636
	br_set_gso_limits(br);
637

L
Linus Torvalds 已提交
638
	spin_lock_bh(&br->lock);
639
	changed_addr = br_stp_recalculate_bridge_id(br);
L
Linus Torvalds 已提交
640 641
	spin_unlock_bh(&br->lock);

642 643 644
	if (changed_addr)
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);

645 646
	netdev_update_features(br->dev);

L
Linus Torvalds 已提交
647 648
	return 0;
}
649 650 651 652 653 654 655 656

void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
{
	struct net_bridge *br = p->br;

	if (mask & BR_AUTO_MASK)
		nbp_update_port_count(br);
}