br_if.c 12.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *	Userspace interface
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek		<buytenh@gnu.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/netdevice.h>
16
#include <linux/etherdevice.h>
W
WANG Cong 已提交
17
#include <linux/netpoll.h>
L
Linus Torvalds 已提交
18 19 20 21 22
#include <linux/ethtool.h>
#include <linux/if_arp.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/rtnetlink.h>
23
#include <linux/if_ether.h>
24
#include <linux/slab.h>
L
Linus Torvalds 已提交
25
#include <net/sock.h>
26
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
27 28 29 30 31 32 33

#include "br_private.h"

/*
 * Determine initial path cost based on speed.
 * using recommendations from 802.1d standard
 *
34
 * Since driver might sleep need to not be holding any locks.
L
Linus Torvalds 已提交
35
 */
36
static int port_cost(struct net_device *dev)
L
Linus Torvalds 已提交
37
{
J
Jiri Pirko 已提交
38
	struct ethtool_cmd ecmd;
39

40
	if (!__ethtool_get_settings(dev, &ecmd)) {
J
Jiri Pirko 已提交
41 42 43 44 45 46 47 48 49
		switch (ethtool_cmd_speed(&ecmd)) {
		case SPEED_10000:
			return 2;
		case SPEED_1000:
			return 4;
		case SPEED_100:
			return 19;
		case SPEED_10:
			return 100;
L
Linus Torvalds 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62
		}
	}

	/* Old silly heuristics based on name */
	if (!strncmp(dev->name, "lec", 3))
		return 7;

	if (!strncmp(dev->name, "plip", 4))
		return 2500;

	return 100;	/* assume old 10Mbps */
}

63

T
tanxiaojun 已提交
64
/* Check for port carrier transitions. */
65
void br_port_carrier_check(struct net_bridge_port *p)
66
{
67 68
	struct net_device *dev = p->dev;
	struct net_bridge *br = p->br;
S
Stephen Hemminger 已提交
69

70 71
	if (!(p->flags & BR_ADMIN_COST) &&
	    netif_running(dev) && netif_oper_up(dev))
S
Stephen Hemminger 已提交
72 73
		p->path_cost = port_cost(dev);

74 75 76 77
	if (!netif_running(br->dev))
		return;

	spin_lock_bh(&br->lock);
78
	if (netif_running(dev) && netif_oper_up(dev)) {
79 80 81 82 83
		if (p->state == BR_STATE_DISABLED)
			br_stp_enable_port(p);
	} else {
		if (p->state != BR_STATE_DISABLED)
			br_stp_disable_port(p);
84
	}
85
	spin_unlock_bh(&br->lock);
86 87
}

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
static void br_port_set_promisc(struct net_bridge_port *p)
{
	int err = 0;

	if (br_promisc_port(p))
		return;

	err = dev_set_promiscuity(p->dev, 1);
	if (err)
		return;

	br_fdb_unsync_static(p->br, p);
	p->flags |= BR_PROMISC;
}

static void br_port_clear_promisc(struct net_bridge_port *p)
{
	int err;

	/* Check if the port is already non-promisc or if it doesn't
	 * support UNICAST filtering.  Without unicast filtering support
	 * we'll end up re-enabling promisc mode anyway, so just check for
	 * it here.
	 */
	if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
		return;

	/* Since we'll be clearing the promisc mode, program the port
	 * first so that we don't have interruption in traffic.
	 */
	err = br_fdb_sync_static(p->br, p);
	if (err)
		return;

	dev_set_promiscuity(p->dev, -1);
	p->flags &= ~BR_PROMISC;
}

/* When a port is added or removed or when certain port flags
 * change, this function is called to automatically manage
 * promiscuity setting of all the bridge ports.  We are always called
 * under RTNL so can skip using rcu primitives.
 */
void br_manage_promisc(struct net_bridge *br)
{
	struct net_bridge_port *p;
	bool set_all = false;

	/* If vlan filtering is disabled or bridge interface is placed
	 * into promiscuous mode, place all ports in promiscuous mode.
	 */
	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
		set_all = true;

	list_for_each_entry(p, &br->port_list, list) {
		if (set_all) {
			br_port_set_promisc(p);
		} else {
			/* If the number of auto-ports is <= 1, then all other
			 * ports will have their output configuration
			 * statically specified through fdbs.  Since ingress
			 * on the auto-port becomes forwarding/egress to other
			 * ports and egress configuration is statically known,
			 * we can say that ingress configuration of the
			 * auto-port is also statically known.
			 * This lets us disable promiscuous mode and write
			 * this config to hw.
			 */
156 157
			if (br->auto_cnt == 0 ||
			    (br->auto_cnt == 1 && br_auto_port(p)))
158 159 160 161 162 163 164
				br_port_clear_promisc(p);
			else
				br_port_set_promisc(p);
		}
	}
}

165 166 167 168 169 170 171 172 173
static void nbp_update_port_count(struct net_bridge *br)
{
	struct net_bridge_port *p;
	u32 cnt = 0;

	list_for_each_entry(p, &br->port_list, list) {
		if (br_auto_port(p))
			cnt++;
	}
174 175 176 177 178 179 180 181
	if (br->auto_cnt != cnt) {
		br->auto_cnt = cnt;
		br_manage_promisc(br);
	}
}

static void nbp_delete_promisc(struct net_bridge_port *p)
{
182
	/* If port is currently promiscuous, unset promiscuity.
183 184 185 186 187 188 189 190
	 * Otherwise, it is a static port so remove all addresses
	 * from it.
	 */
	dev_set_allmulti(p->dev, -1);
	if (br_promisc_port(p))
		dev_set_promiscuity(p->dev, -1);
	else
		br_fdb_unsync_static(p->br, p);
191 192
}

193 194 195 196 197 198 199 200 201 202 203 204 205 206
static void release_nbp(struct kobject *kobj)
{
	struct net_bridge_port *p
		= container_of(kobj, struct net_bridge_port, kobj);
	kfree(p);
}

static struct kobj_type brport_ktype = {
#ifdef CONFIG_SYSFS
	.sysfs_ops = &brport_sysfs_ops,
#endif
	.release = release_nbp,
};

L
Linus Torvalds 已提交
207 208 209 210 211 212 213 214
static void destroy_nbp(struct net_bridge_port *p)
{
	struct net_device *dev = p->dev;

	p->br = NULL;
	p->dev = NULL;
	dev_put(dev);

215
	kobject_put(&p->kobj);
L
Linus Torvalds 已提交
216 217 218 219 220 221 222 223 224
}

static void destroy_nbp_rcu(struct rcu_head *head)
{
	struct net_bridge_port *p =
			container_of(head, struct net_bridge_port, rcu);
	destroy_nbp(p);
}

225 226 227 228 229 230 231 232 233
/* Delete port(interface) from bridge is done in two steps.
 * via RCU. First step, marks device as down. That deletes
 * all the timers and stops new packets from flowing through.
 *
 * Final cleanup doesn't occur until after all CPU's finished
 * processing packets.
 *
 * Protected from multiple admin operations by RTNL mutex
 */
L
Linus Torvalds 已提交
234 235 236 237 238
static void del_nbp(struct net_bridge_port *p)
{
	struct net_bridge *br = p->br;
	struct net_device *dev = p->dev;

239
	sysfs_remove_link(br->ifobj, p->dev->name);
240

241
	nbp_delete_promisc(p);
L
Linus Torvalds 已提交
242 243 244 245 246

	spin_lock_bh(&br->lock);
	br_stp_disable_port(p);
	spin_unlock_bh(&br->lock);

247 248
	br_ifinfo_notify(RTM_DELLINK, p);

L
Linus Torvalds 已提交
249 250
	list_del_rcu(&p->list);

251
	nbp_vlan_flush(p);
252
	br_fdb_delete_by_port(br, p, 0, 1);
253 254
	nbp_update_port_count(br);

255 256
	netdev_upper_dev_unlink(dev, br->dev);

257 258
	dev->priv_flags &= ~IFF_BRIDGE_PORT;

259
	netdev_rx_handler_unregister(dev);
260

261 262
	br_multicast_del_port(p);

263
	kobject_uevent(&p->kobj, KOBJ_REMOVE);
264 265
	kobject_del(&p->kobj);

H
Herbert Xu 已提交
266 267
	br_netpoll_disable(p);

L
Linus Torvalds 已提交
268 269 270
	call_rcu(&p->rcu, destroy_nbp_rcu);
}

271 272
/* Delete bridge device */
void br_dev_delete(struct net_device *dev, struct list_head *head)
L
Linus Torvalds 已提交
273
{
274
	struct net_bridge *br = netdev_priv(dev);
L
Linus Torvalds 已提交
275 276 277 278 279 280
	struct net_bridge_port *p, *n;

	list_for_each_entry_safe(p, n, &br->port_list, list) {
		del_nbp(p);
	}

281
	br_fdb_delete_by_port(br, NULL, 0, 1);
282

283
	br_vlan_flush(br);
284
	br_multicast_dev_del(br);
L
Linus Torvalds 已提交
285 286 287
	del_timer_sync(&br->gc_timer);

	br_sysfs_delbr(br->dev);
288
	unregister_netdevice_queue(br->dev, head);
L
Linus Torvalds 已提交
289 290 291 292 293 294 295 296 297
}

/* find an available port number */
static int find_portno(struct net_bridge *br)
{
	int index;
	struct net_bridge_port *p;
	unsigned long *inuse;

S
Stephen Hemminger 已提交
298
	inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
L
Linus Torvalds 已提交
299 300 301 302 303 304 305 306 307 308 309 310 311 312
			GFP_KERNEL);
	if (!inuse)
		return -ENOMEM;

	set_bit(0, inuse);	/* zero is reserved */
	list_for_each_entry(p, &br->port_list, list) {
		set_bit(p->port_no, inuse);
	}
	index = find_first_zero_bit(inuse, BR_MAX_PORTS);
	kfree(inuse);

	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
}

313
/* called with RTNL but without bridge lock */
314
static struct net_bridge_port *new_nbp(struct net_bridge *br,
315
				       struct net_device *dev)
L
Linus Torvalds 已提交
316 317 318
{
	int index;
	struct net_bridge_port *p;
319

L
Linus Torvalds 已提交
320 321 322 323
	index = find_portno(br);
	if (index < 0)
		return ERR_PTR(index);

S
Stephen Hemminger 已提交
324
	p = kzalloc(sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
325 326 327 328 329 330
	if (p == NULL)
		return ERR_PTR(-ENOMEM);

	p->br = br;
	dev_hold(dev);
	p->dev = dev;
331
	p->path_cost = port_cost(dev);
332
	p->priority = 0x8000 >> BR_PORT_BITS;
L
Linus Torvalds 已提交
333
	p->port_no = index;
334
	p->flags = BR_LEARNING | BR_FLOOD;
L
Linus Torvalds 已提交
335
	br_init_port(p);
336
	br_set_state(p, BR_STATE_DISABLED);
337
	br_stp_port_timer_init(p);
338
	br_multicast_add_port(p);
L
Linus Torvalds 已提交
339 340 341 342

	return p;
}

343
int br_add_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
344 345
{
	struct net_device *dev;
346
	int res;
L
Linus Torvalds 已提交
347

348
	dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
349 350
			   br_dev_setup);

351
	if (!dev)
L
Linus Torvalds 已提交
352 353
		return -ENOMEM;

354
	dev_net_set(dev, net);
355
	dev->rtnl_link_ops = &br_link_ops;
356

357 358 359 360
	res = register_netdev(dev);
	if (res)
		free_netdev(dev);
	return res;
L
Linus Torvalds 已提交
361 362
}

363
int br_del_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
364 365 366 367 368
{
	struct net_device *dev;
	int ret = 0;

	rtnl_lock();
369
	dev = __dev_get_by_name(net, name);
370
	if (dev == NULL)
L
Linus Torvalds 已提交
371 372 373 374 375 376 377 378 379 380
		ret =  -ENXIO; 	/* Could not find device */

	else if (!(dev->priv_flags & IFF_EBRIDGE)) {
		/* Attempt to delete non bridge device! */
		ret = -EPERM;
	}

	else if (dev->flags & IFF_UP) {
		/* Not shutdown yet. */
		ret = -EBUSY;
381
	}
L
Linus Torvalds 已提交
382

383
	else
384
		br_dev_delete(dev, NULL);
L
Linus Torvalds 已提交
385 386 387 388 389

	rtnl_unlock();
	return ret;
}

390
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
L
Linus Torvalds 已提交
391 392 393 394 395 396 397 398
int br_min_mtu(const struct net_bridge *br)
{
	const struct net_bridge_port *p;
	int mtu = 0;

	ASSERT_RTNL();

	if (list_empty(&br->port_list))
399
		mtu = ETH_DATA_LEN;
L
Linus Torvalds 已提交
400 401 402 403 404 405 406 407 408
	else {
		list_for_each_entry(p, &br->port_list, list) {
			if (!mtu  || p->dev->mtu < mtu)
				mtu = p->dev->mtu;
		}
	}
	return mtu;
}

409 410 411
/*
 * Recomputes features using slave's features
 */
412 413
netdev_features_t br_features_recompute(struct net_bridge *br,
	netdev_features_t features)
414 415
{
	struct net_bridge_port *p;
416
	netdev_features_t mask;
417

418
	if (list_empty(&br->port_list))
419
		return features;
420

421
	mask = features;
422
	features &= ~NETIF_F_ONE_FOR_ALL;
423 424

	list_for_each_entry(p, &br->port_list, list) {
425 426
		features = netdev_increment_features(features,
						     p->dev->features, mask);
427
	}
428
	features = netdev_add_tso_features(features, mask);
429

430
	return features;
431 432
}

L
Linus Torvalds 已提交
433 434 435 436 437
/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	int err = 0;
438
	bool changed_addr;
L
Linus Torvalds 已提交
439

440 441 442 443 444 445
	/* Don't allow bridging non-ethernet like devices, or DSA-enabled
	 * master network devices since the bridge layer rx_handler prevents
	 * the DSA fake ethertype handler to be invoked, so we do not strip off
	 * the DSA switch tag protocol header and the bridge layer just return
	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
	 */
446
	if ((dev->flags & IFF_LOOPBACK) ||
447
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
448 449
	    !is_valid_ether_addr(dev->dev_addr) ||
	    netdev_uses_dsa(dev))
L
Linus Torvalds 已提交
450 451
		return -EINVAL;

452
	/* No bridging of bridges */
453
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
L
Linus Torvalds 已提交
454 455
		return -ELOOP;

456
	/* Device is already being bridged */
457
	if (br_port_exists(dev))
L
Linus Torvalds 已提交
458 459
		return -EBUSY;

460 461 462 463
	/* No bridging devices that dislike that (e.g. wireless) */
	if (dev->priv_flags & IFF_DONT_BRIDGE)
		return -EOPNOTSUPP;

464 465
	p = new_nbp(br, dev);
	if (IS_ERR(p))
L
Linus Torvalds 已提交
466 467
		return PTR_ERR(p);

468 469
	call_netdevice_notifiers(NETDEV_JOIN, dev);

470
	err = dev_set_allmulti(dev, 1);
471 472 473
	if (err)
		goto put_back;

474 475
	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
				   SYSFS_BRIDGE_PORT_ATTR);
476 477
	if (err)
		goto err1;
L
Linus Torvalds 已提交
478

479 480 481
	err = br_sysfs_addif(p);
	if (err)
		goto err2;
L
Linus Torvalds 已提交
482

483
	err = br_netpoll_enable(p);
S
stephen hemminger 已提交
484
	if (err)
H
Herbert Xu 已提交
485 486
		goto err3;

487
	err = netdev_rx_handler_register(dev, br_handle_frame, p);
488
	if (err)
489
		goto err4;
490

491 492 493
	dev->priv_flags |= IFF_BRIDGE_PORT;

	err = netdev_master_upper_dev_link(dev, br->dev);
494
	if (err)
495
		goto err5;
496

497
	dev_disable_lro(dev);
498 499 500

	list_add_rcu(&p->list, &br->port_list);

501 502
	nbp_update_port_count(br);

503 504
	netdev_update_features(br->dev);

505 506 507
	if (br->dev->needed_headroom < dev->needed_headroom)
		br->dev->needed_headroom = dev->needed_headroom;

508 509 510
	if (br_fdb_insert(br, p, dev->dev_addr, 0))
		netdev_err(dev, "failed insert local address bridge forwarding table\n");

511 512 513
	if (nbp_vlan_init(p))
		netdev_err(dev, "failed to initialize vlan filtering on this port\n");

514
	spin_lock_bh(&br->lock);
515
	changed_addr = br_stp_recalculate_bridge_id(br);
516

517
	if (netif_running(dev) && netif_oper_up(dev) &&
518 519
	    (br->dev->flags & IFF_UP))
		br_stp_enable_port(p);
520 521
	spin_unlock_bh(&br->lock);

522 523
	br_ifinfo_notify(RTM_NEWLINK, p);

524
	if (changed_addr)
525
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
526

527
	dev_set_mtu(br->dev, br_min_mtu(br));
528

529
	kobject_uevent(&p->kobj, KOBJ_ADD);
L
Linus Torvalds 已提交
530

531
	return 0;
532

533
err5:
534 535
	dev->priv_flags &= ~IFF_BRIDGE_PORT;
	netdev_rx_handler_unregister(dev);
536 537
err4:
	br_netpoll_disable(p);
H
Herbert Xu 已提交
538 539
err3:
	sysfs_remove_link(br->ifobj, p->dev->name);
540
err2:
541
	kobject_put(&p->kobj);
542
	p = NULL; /* kobject_put frees */
543
err1:
544
	dev_set_allmulti(dev, -1);
545 546
put_back:
	dev_put(dev);
547
	kfree(p);
L
Linus Torvalds 已提交
548 549 550 551 552 553
	return err;
}

/* called with RTNL */
int br_del_if(struct net_bridge *br, struct net_device *dev)
{
554
	struct net_bridge_port *p;
555
	bool changed_addr;
556

557
	p = br_port_get_rtnl(dev);
558
	if (!p || p->br != br)
L
Linus Torvalds 已提交
559 560
		return -EINVAL;

561 562 563 564
	/* Since more than one interface can be attached to a bridge,
	 * there still maybe an alternate path for netconsole to use;
	 * therefore there is no reason for a NETDEV_RELEASE event.
	 */
L
Linus Torvalds 已提交
565 566
	del_nbp(p);

567 568
	dev_set_mtu(br->dev, br_min_mtu(br));

L
Linus Torvalds 已提交
569
	spin_lock_bh(&br->lock);
570
	changed_addr = br_stp_recalculate_bridge_id(br);
L
Linus Torvalds 已提交
571 572
	spin_unlock_bh(&br->lock);

573 574 575
	if (changed_addr)
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);

576 577
	netdev_update_features(br->dev);

L
Linus Torvalds 已提交
578 579
	return 0;
}
580 581 582 583 584 585 586 587

void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
{
	struct net_bridge *br = p->br;

	if (mask & BR_AUTO_MASK)
		nbp_update_port_count(br);
}