br_if.c 12.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *	Userspace interface
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek		<buytenh@gnu.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/netdevice.h>
16
#include <linux/etherdevice.h>
W
WANG Cong 已提交
17
#include <linux/netpoll.h>
L
Linus Torvalds 已提交
18 19 20 21 22
#include <linux/ethtool.h>
#include <linux/if_arp.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/rtnetlink.h>
23
#include <linux/if_ether.h>
24
#include <linux/slab.h>
L
Linus Torvalds 已提交
25
#include <net/sock.h>
26
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
27 28 29 30 31 32 33

#include "br_private.h"

/*
 * Determine initial path cost based on speed.
 * using recommendations from 802.1d standard
 *
34
 * Since driver might sleep need to not be holding any locks.
L
Linus Torvalds 已提交
35
 */
36
static int port_cost(struct net_device *dev)
L
Linus Torvalds 已提交
37
{
J
Jiri Pirko 已提交
38
	struct ethtool_cmd ecmd;
39

40
	if (!__ethtool_get_settings(dev, &ecmd)) {
J
Jiri Pirko 已提交
41 42 43 44 45 46 47 48 49
		switch (ethtool_cmd_speed(&ecmd)) {
		case SPEED_10000:
			return 2;
		case SPEED_1000:
			return 4;
		case SPEED_100:
			return 19;
		case SPEED_10:
			return 100;
L
Linus Torvalds 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62
		}
	}

	/* Old silly heuristics based on name */
	if (!strncmp(dev->name, "lec", 3))
		return 7;

	if (!strncmp(dev->name, "plip", 4))
		return 2500;

	return 100;	/* assume old 10Mbps */
}

63

T
tanxiaojun 已提交
64
/* Check for port carrier transitions. */
65
void br_port_carrier_check(struct net_bridge_port *p)
66
{
67 68
	struct net_device *dev = p->dev;
	struct net_bridge *br = p->br;
S
Stephen Hemminger 已提交
69

70 71
	if (!(p->flags & BR_ADMIN_COST) &&
	    netif_running(dev) && netif_oper_up(dev))
S
Stephen Hemminger 已提交
72 73
		p->path_cost = port_cost(dev);

74 75 76 77
	if (!netif_running(br->dev))
		return;

	spin_lock_bh(&br->lock);
78
	if (netif_running(dev) && netif_oper_up(dev)) {
79 80 81 82 83
		if (p->state == BR_STATE_DISABLED)
			br_stp_enable_port(p);
	} else {
		if (p->state != BR_STATE_DISABLED)
			br_stp_disable_port(p);
84
	}
85
	spin_unlock_bh(&br->lock);
86 87
}

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
static void br_port_set_promisc(struct net_bridge_port *p)
{
	int err = 0;

	if (br_promisc_port(p))
		return;

	err = dev_set_promiscuity(p->dev, 1);
	if (err)
		return;

	br_fdb_unsync_static(p->br, p);
	p->flags |= BR_PROMISC;
}

static void br_port_clear_promisc(struct net_bridge_port *p)
{
	int err;

	/* Check if the port is already non-promisc or if it doesn't
	 * support UNICAST filtering.  Without unicast filtering support
	 * we'll end up re-enabling promisc mode anyway, so just check for
	 * it here.
	 */
	if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
		return;

	/* Since we'll be clearing the promisc mode, program the port
	 * first so that we don't have interruption in traffic.
	 */
	err = br_fdb_sync_static(p->br, p);
	if (err)
		return;

	dev_set_promiscuity(p->dev, -1);
	p->flags &= ~BR_PROMISC;
}

/* When a port is added or removed or when certain port flags
 * change, this function is called to automatically manage
 * promiscuity setting of all the bridge ports.  We are always called
 * under RTNL so can skip using rcu primitives.
 */
void br_manage_promisc(struct net_bridge *br)
{
	struct net_bridge_port *p;
	bool set_all = false;

	/* If vlan filtering is disabled or bridge interface is placed
	 * into promiscuous mode, place all ports in promiscuous mode.
	 */
	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
		set_all = true;

	list_for_each_entry(p, &br->port_list, list) {
		if (set_all) {
			br_port_set_promisc(p);
		} else {
			/* If the number of auto-ports is <= 1, then all other
			 * ports will have their output configuration
			 * statically specified through fdbs.  Since ingress
			 * on the auto-port becomes forwarding/egress to other
			 * ports and egress configuration is statically known,
			 * we can say that ingress configuration of the
			 * auto-port is also statically known.
			 * This lets us disable promiscuous mode and write
			 * this config to hw.
			 */
156 157
			if (br->auto_cnt == 0 ||
			    (br->auto_cnt == 1 && br_auto_port(p)))
158 159 160 161 162 163 164
				br_port_clear_promisc(p);
			else
				br_port_set_promisc(p);
		}
	}
}

165 166 167 168 169 170 171 172 173
static void nbp_update_port_count(struct net_bridge *br)
{
	struct net_bridge_port *p;
	u32 cnt = 0;

	list_for_each_entry(p, &br->port_list, list) {
		if (br_auto_port(p))
			cnt++;
	}
174 175 176 177 178 179 180 181
	if (br->auto_cnt != cnt) {
		br->auto_cnt = cnt;
		br_manage_promisc(br);
	}
}

static void nbp_delete_promisc(struct net_bridge_port *p)
{
182
	/* If port is currently promiscuous, unset promiscuity.
183 184 185 186 187 188 189 190
	 * Otherwise, it is a static port so remove all addresses
	 * from it.
	 */
	dev_set_allmulti(p->dev, -1);
	if (br_promisc_port(p))
		dev_set_promiscuity(p->dev, -1);
	else
		br_fdb_unsync_static(p->br, p);
191 192
}

193 194 195 196 197 198 199 200 201 202 203 204 205 206
static void release_nbp(struct kobject *kobj)
{
	struct net_bridge_port *p
		= container_of(kobj, struct net_bridge_port, kobj);
	kfree(p);
}

static struct kobj_type brport_ktype = {
#ifdef CONFIG_SYSFS
	.sysfs_ops = &brport_sysfs_ops,
#endif
	.release = release_nbp,
};

L
Linus Torvalds 已提交
207 208 209 210 211 212 213 214
static void destroy_nbp(struct net_bridge_port *p)
{
	struct net_device *dev = p->dev;

	p->br = NULL;
	p->dev = NULL;
	dev_put(dev);

215
	kobject_put(&p->kobj);
L
Linus Torvalds 已提交
216 217 218 219 220 221 222 223 224
}

static void destroy_nbp_rcu(struct rcu_head *head)
{
	struct net_bridge_port *p =
			container_of(head, struct net_bridge_port, rcu);
	destroy_nbp(p);
}

225 226 227 228 229 230 231 232 233
/* Delete port(interface) from bridge is done in two steps.
 * via RCU. First step, marks device as down. That deletes
 * all the timers and stops new packets from flowing through.
 *
 * Final cleanup doesn't occur until after all CPU's finished
 * processing packets.
 *
 * Protected from multiple admin operations by RTNL mutex
 */
L
Linus Torvalds 已提交
234 235 236 237 238
static void del_nbp(struct net_bridge_port *p)
{
	struct net_bridge *br = p->br;
	struct net_device *dev = p->dev;

239
	sysfs_remove_link(br->ifobj, p->dev->name);
240

241
	nbp_delete_promisc(p);
L
Linus Torvalds 已提交
242 243 244 245 246

	spin_lock_bh(&br->lock);
	br_stp_disable_port(p);
	spin_unlock_bh(&br->lock);

247 248
	br_ifinfo_notify(RTM_DELLINK, p);

L
Linus Torvalds 已提交
249 250
	list_del_rcu(&p->list);

251 252
	nbp_vlan_flush(p);
	br_fdb_delete_by_port(br, p, 1);
253 254
	nbp_update_port_count(br);

255 256
	netdev_upper_dev_unlink(dev, br->dev);

257 258
	dev->priv_flags &= ~IFF_BRIDGE_PORT;

259
	netdev_rx_handler_unregister(dev);
260

261 262
	br_multicast_del_port(p);

263
	kobject_uevent(&p->kobj, KOBJ_REMOVE);
264 265
	kobject_del(&p->kobj);

H
Herbert Xu 已提交
266 267
	br_netpoll_disable(p);

L
Linus Torvalds 已提交
268 269 270
	call_rcu(&p->rcu, destroy_nbp_rcu);
}

271 272
/* Delete bridge device */
void br_dev_delete(struct net_device *dev, struct list_head *head)
L
Linus Torvalds 已提交
273
{
274
	struct net_bridge *br = netdev_priv(dev);
L
Linus Torvalds 已提交
275 276 277 278 279 280
	struct net_bridge_port *p, *n;

	list_for_each_entry_safe(p, n, &br->port_list, list) {
		del_nbp(p);
	}

281 282
	br_fdb_delete_by_port(br, NULL, 1);

283
	br_vlan_flush(br);
L
Linus Torvalds 已提交
284 285 286
	del_timer_sync(&br->gc_timer);

	br_sysfs_delbr(br->dev);
287
	unregister_netdevice_queue(br->dev, head);
L
Linus Torvalds 已提交
288 289 290 291 292 293 294 295 296
}

/* find an available port number */
static int find_portno(struct net_bridge *br)
{
	int index;
	struct net_bridge_port *p;
	unsigned long *inuse;

S
Stephen Hemminger 已提交
297
	inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
L
Linus Torvalds 已提交
298 299 300 301 302 303 304 305 306 307 308 309 310 311
			GFP_KERNEL);
	if (!inuse)
		return -ENOMEM;

	set_bit(0, inuse);	/* zero is reserved */
	list_for_each_entry(p, &br->port_list, list) {
		set_bit(p->port_no, inuse);
	}
	index = find_first_zero_bit(inuse, BR_MAX_PORTS);
	kfree(inuse);

	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
}

312
/* called with RTNL but without bridge lock */
313
static struct net_bridge_port *new_nbp(struct net_bridge *br,
314
				       struct net_device *dev)
L
Linus Torvalds 已提交
315 316 317
{
	int index;
	struct net_bridge_port *p;
318

L
Linus Torvalds 已提交
319 320 321 322
	index = find_portno(br);
	if (index < 0)
		return ERR_PTR(index);

S
Stephen Hemminger 已提交
323
	p = kzalloc(sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
324 325 326 327 328 329
	if (p == NULL)
		return ERR_PTR(-ENOMEM);

	p->br = br;
	dev_hold(dev);
	p->dev = dev;
330
	p->path_cost = port_cost(dev);
331
	p->priority = 0x8000 >> BR_PORT_BITS;
L
Linus Torvalds 已提交
332
	p->port_no = index;
333
	p->flags = BR_LEARNING | BR_FLOOD;
L
Linus Torvalds 已提交
334
	br_init_port(p);
335
	br_set_state(p, BR_STATE_DISABLED);
336
	br_stp_port_timer_init(p);
337
	br_multicast_add_port(p);
L
Linus Torvalds 已提交
338 339 340 341

	return p;
}

342
int br_add_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
343 344
{
	struct net_device *dev;
345
	int res;
L
Linus Torvalds 已提交
346

347
	dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
348 349
			   br_dev_setup);

350
	if (!dev)
L
Linus Torvalds 已提交
351 352
		return -ENOMEM;

353
	dev_net_set(dev, net);
354
	dev->rtnl_link_ops = &br_link_ops;
355

356 357 358 359
	res = register_netdev(dev);
	if (res)
		free_netdev(dev);
	return res;
L
Linus Torvalds 已提交
360 361
}

362
int br_del_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
363 364 365 366 367
{
	struct net_device *dev;
	int ret = 0;

	rtnl_lock();
368
	dev = __dev_get_by_name(net, name);
369
	if (dev == NULL)
L
Linus Torvalds 已提交
370 371 372 373 374 375 376 377 378 379
		ret =  -ENXIO; 	/* Could not find device */

	else if (!(dev->priv_flags & IFF_EBRIDGE)) {
		/* Attempt to delete non bridge device! */
		ret = -EPERM;
	}

	else if (dev->flags & IFF_UP) {
		/* Not shutdown yet. */
		ret = -EBUSY;
380
	}
L
Linus Torvalds 已提交
381

382
	else
383
		br_dev_delete(dev, NULL);
L
Linus Torvalds 已提交
384 385 386 387 388

	rtnl_unlock();
	return ret;
}

389
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
L
Linus Torvalds 已提交
390 391 392 393 394 395 396 397
int br_min_mtu(const struct net_bridge *br)
{
	const struct net_bridge_port *p;
	int mtu = 0;

	ASSERT_RTNL();

	if (list_empty(&br->port_list))
398
		mtu = ETH_DATA_LEN;
L
Linus Torvalds 已提交
399 400 401 402 403 404 405 406 407
	else {
		list_for_each_entry(p, &br->port_list, list) {
			if (!mtu  || p->dev->mtu < mtu)
				mtu = p->dev->mtu;
		}
	}
	return mtu;
}

408 409 410
/*
 * Recomputes features using slave's features
 */
411 412
netdev_features_t br_features_recompute(struct net_bridge *br,
	netdev_features_t features)
413 414
{
	struct net_bridge_port *p;
415
	netdev_features_t mask;
416

417
	if (list_empty(&br->port_list))
418
		return features;
419

420
	mask = features;
421
	features &= ~NETIF_F_ONE_FOR_ALL;
422 423

	list_for_each_entry(p, &br->port_list, list) {
424 425
		features = netdev_increment_features(features,
						     p->dev->features, mask);
426
	}
427
	features = netdev_add_tso_features(features, mask);
428

429
	return features;
430 431
}

L
Linus Torvalds 已提交
432 433 434 435 436
/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	int err = 0;
437
	bool changed_addr;
L
Linus Torvalds 已提交
438

439 440 441 442 443 444
	/* Don't allow bridging non-ethernet like devices, or DSA-enabled
	 * master network devices since the bridge layer rx_handler prevents
	 * the DSA fake ethertype handler to be invoked, so we do not strip off
	 * the DSA switch tag protocol header and the bridge layer just return
	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
	 */
445
	if ((dev->flags & IFF_LOOPBACK) ||
446
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
447 448
	    !is_valid_ether_addr(dev->dev_addr) ||
	    netdev_uses_dsa(dev))
L
Linus Torvalds 已提交
449 450
		return -EINVAL;

451
	/* No bridging of bridges */
452
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
L
Linus Torvalds 已提交
453 454
		return -ELOOP;

455
	/* Device is already being bridged */
456
	if (br_port_exists(dev))
L
Linus Torvalds 已提交
457 458
		return -EBUSY;

459 460 461 462
	/* No bridging devices that dislike that (e.g. wireless) */
	if (dev->priv_flags & IFF_DONT_BRIDGE)
		return -EOPNOTSUPP;

463 464
	p = new_nbp(br, dev);
	if (IS_ERR(p))
L
Linus Torvalds 已提交
465 466
		return PTR_ERR(p);

467 468
	call_netdevice_notifiers(NETDEV_JOIN, dev);

469
	err = dev_set_allmulti(dev, 1);
470 471 472
	if (err)
		goto put_back;

473 474
	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
				   SYSFS_BRIDGE_PORT_ATTR);
475 476
	if (err)
		goto err1;
L
Linus Torvalds 已提交
477

478 479 480
	err = br_sysfs_addif(p);
	if (err)
		goto err2;
L
Linus Torvalds 已提交
481

482
	err = br_netpoll_enable(p);
S
stephen hemminger 已提交
483
	if (err)
H
Herbert Xu 已提交
484 485
		goto err3;

486
	err = netdev_rx_handler_register(dev, br_handle_frame, p);
487
	if (err)
488
		goto err4;
489

490 491 492
	dev->priv_flags |= IFF_BRIDGE_PORT;

	err = netdev_master_upper_dev_link(dev, br->dev);
493
	if (err)
494
		goto err5;
495

496
	dev_disable_lro(dev);
497 498 499

	list_add_rcu(&p->list, &br->port_list);

500 501
	nbp_update_port_count(br);

502 503
	netdev_update_features(br->dev);

504 505 506
	if (br->dev->needed_headroom < dev->needed_headroom)
		br->dev->needed_headroom = dev->needed_headroom;

507 508 509
	if (br_fdb_insert(br, p, dev->dev_addr, 0))
		netdev_err(dev, "failed insert local address bridge forwarding table\n");

510 511 512
	if (nbp_vlan_init(p))
		netdev_err(dev, "failed to initialize vlan filtering on this port\n");

513
	spin_lock_bh(&br->lock);
514
	changed_addr = br_stp_recalculate_bridge_id(br);
515

516
	if (netif_running(dev) && netif_oper_up(dev) &&
517 518
	    (br->dev->flags & IFF_UP))
		br_stp_enable_port(p);
519 520
	spin_unlock_bh(&br->lock);

521 522
	br_ifinfo_notify(RTM_NEWLINK, p);

523
	if (changed_addr)
524
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
525

526
	dev_set_mtu(br->dev, br_min_mtu(br));
527

528
	kobject_uevent(&p->kobj, KOBJ_ADD);
L
Linus Torvalds 已提交
529

530
	return 0;
531

532
err5:
533 534
	dev->priv_flags &= ~IFF_BRIDGE_PORT;
	netdev_rx_handler_unregister(dev);
535 536
err4:
	br_netpoll_disable(p);
H
Herbert Xu 已提交
537 538
err3:
	sysfs_remove_link(br->ifobj, p->dev->name);
539
err2:
540
	kobject_put(&p->kobj);
541
	p = NULL; /* kobject_put frees */
542
err1:
543
	dev_set_allmulti(dev, -1);
544 545
put_back:
	dev_put(dev);
546
	kfree(p);
L
Linus Torvalds 已提交
547 548 549 550 551 552
	return err;
}

/* called with RTNL */
int br_del_if(struct net_bridge *br, struct net_device *dev)
{
553
	struct net_bridge_port *p;
554
	bool changed_addr;
555

556
	p = br_port_get_rtnl(dev);
557
	if (!p || p->br != br)
L
Linus Torvalds 已提交
558 559
		return -EINVAL;

560 561 562 563
	/* Since more than one interface can be attached to a bridge,
	 * there still maybe an alternate path for netconsole to use;
	 * therefore there is no reason for a NETDEV_RELEASE event.
	 */
L
Linus Torvalds 已提交
564 565 566
	del_nbp(p);

	spin_lock_bh(&br->lock);
567
	changed_addr = br_stp_recalculate_bridge_id(br);
L
Linus Torvalds 已提交
568 569
	spin_unlock_bh(&br->lock);

570 571 572
	if (changed_addr)
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);

573 574
	netdev_update_features(br->dev);

L
Linus Torvalds 已提交
575 576
	return 0;
}
577 578 579 580 581 582 583 584

void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
{
	struct net_bridge *br = p->br;

	if (mask & BR_AUTO_MASK)
		nbp_update_port_count(br);
}