br_if.c 12.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *	Userspace interface
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek		<buytenh@gnu.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/netdevice.h>
16
#include <linux/etherdevice.h>
W
WANG Cong 已提交
17
#include <linux/netpoll.h>
L
Linus Torvalds 已提交
18 19 20 21 22
#include <linux/ethtool.h>
#include <linux/if_arp.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/rtnetlink.h>
23
#include <linux/if_ether.h>
24
#include <linux/slab.h>
L
Linus Torvalds 已提交
25
#include <net/sock.h>
26
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
27 28 29 30 31 32 33

#include "br_private.h"

/*
 * Determine initial path cost based on speed.
 * using recommendations from 802.1d standard
 *
34
 * Since driver might sleep need to not be holding any locks.
L
Linus Torvalds 已提交
35
 */
36
static int port_cost(struct net_device *dev)
L
Linus Torvalds 已提交
37
{
J
Jiri Pirko 已提交
38
	struct ethtool_cmd ecmd;
39

40
	if (!__ethtool_get_settings(dev, &ecmd)) {
J
Jiri Pirko 已提交
41 42 43 44 45 46 47 48 49
		switch (ethtool_cmd_speed(&ecmd)) {
		case SPEED_10000:
			return 2;
		case SPEED_1000:
			return 4;
		case SPEED_100:
			return 19;
		case SPEED_10:
			return 100;
L
Linus Torvalds 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62
		}
	}

	/* Old silly heuristics based on name */
	if (!strncmp(dev->name, "lec", 3))
		return 7;

	if (!strncmp(dev->name, "plip", 4))
		return 2500;

	return 100;	/* assume old 10Mbps */
}

63

T
tanxiaojun 已提交
64
/* Check for port carrier transitions. */
65
void br_port_carrier_check(struct net_bridge_port *p)
66
{
67 68
	struct net_device *dev = p->dev;
	struct net_bridge *br = p->br;
S
Stephen Hemminger 已提交
69

70 71
	if (!(p->flags & BR_ADMIN_COST) &&
	    netif_running(dev) && netif_oper_up(dev))
S
Stephen Hemminger 已提交
72 73
		p->path_cost = port_cost(dev);

74 75 76 77
	if (!netif_running(br->dev))
		return;

	spin_lock_bh(&br->lock);
78
	if (netif_running(dev) && netif_oper_up(dev)) {
79 80 81 82 83
		if (p->state == BR_STATE_DISABLED)
			br_stp_enable_port(p);
	} else {
		if (p->state != BR_STATE_DISABLED)
			br_stp_disable_port(p);
84
	}
85
	spin_unlock_bh(&br->lock);
86 87
}

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
static void br_port_set_promisc(struct net_bridge_port *p)
{
	int err = 0;

	if (br_promisc_port(p))
		return;

	err = dev_set_promiscuity(p->dev, 1);
	if (err)
		return;

	br_fdb_unsync_static(p->br, p);
	p->flags |= BR_PROMISC;
}

static void br_port_clear_promisc(struct net_bridge_port *p)
{
	int err;

	/* Check if the port is already non-promisc or if it doesn't
	 * support UNICAST filtering.  Without unicast filtering support
	 * we'll end up re-enabling promisc mode anyway, so just check for
	 * it here.
	 */
	if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
		return;

	/* Since we'll be clearing the promisc mode, program the port
	 * first so that we don't have interruption in traffic.
	 */
	err = br_fdb_sync_static(p->br, p);
	if (err)
		return;

	dev_set_promiscuity(p->dev, -1);
	p->flags &= ~BR_PROMISC;
}

/* When a port is added or removed or when certain port flags
 * change, this function is called to automatically manage
 * promiscuity setting of all the bridge ports.  We are always called
 * under RTNL so can skip using rcu primitives.
 */
void br_manage_promisc(struct net_bridge *br)
{
	struct net_bridge_port *p;
	bool set_all = false;

	/* If vlan filtering is disabled or bridge interface is placed
	 * into promiscuous mode, place all ports in promiscuous mode.
	 */
	if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br))
		set_all = true;

	list_for_each_entry(p, &br->port_list, list) {
		if (set_all) {
			br_port_set_promisc(p);
		} else {
			/* If the number of auto-ports is <= 1, then all other
			 * ports will have their output configuration
			 * statically specified through fdbs.  Since ingress
			 * on the auto-port becomes forwarding/egress to other
			 * ports and egress configuration is statically known,
			 * we can say that ingress configuration of the
			 * auto-port is also statically known.
			 * This lets us disable promiscuous mode and write
			 * this config to hw.
			 */
156 157
			if (br->auto_cnt == 0 ||
			    (br->auto_cnt == 1 && br_auto_port(p)))
158 159 160 161 162 163 164
				br_port_clear_promisc(p);
			else
				br_port_set_promisc(p);
		}
	}
}

165 166 167 168 169 170 171 172 173
static void nbp_update_port_count(struct net_bridge *br)
{
	struct net_bridge_port *p;
	u32 cnt = 0;

	list_for_each_entry(p, &br->port_list, list) {
		if (br_auto_port(p))
			cnt++;
	}
174 175 176 177 178 179 180 181
	if (br->auto_cnt != cnt) {
		br->auto_cnt = cnt;
		br_manage_promisc(br);
	}
}

static void nbp_delete_promisc(struct net_bridge_port *p)
{
182
	/* If port is currently promiscuous, unset promiscuity.
183 184 185 186 187 188 189 190
	 * Otherwise, it is a static port so remove all addresses
	 * from it.
	 */
	dev_set_allmulti(p->dev, -1);
	if (br_promisc_port(p))
		dev_set_promiscuity(p->dev, -1);
	else
		br_fdb_unsync_static(p->br, p);
191 192
}

193 194 195 196 197 198 199 200 201 202 203 204 205 206
static void release_nbp(struct kobject *kobj)
{
	struct net_bridge_port *p
		= container_of(kobj, struct net_bridge_port, kobj);
	kfree(p);
}

static struct kobj_type brport_ktype = {
#ifdef CONFIG_SYSFS
	.sysfs_ops = &brport_sysfs_ops,
#endif
	.release = release_nbp,
};

L
Linus Torvalds 已提交
207 208 209 210 211 212 213 214
static void destroy_nbp(struct net_bridge_port *p)
{
	struct net_device *dev = p->dev;

	p->br = NULL;
	p->dev = NULL;
	dev_put(dev);

215
	kobject_put(&p->kobj);
L
Linus Torvalds 已提交
216 217 218 219 220 221 222 223 224
}

static void destroy_nbp_rcu(struct rcu_head *head)
{
	struct net_bridge_port *p =
			container_of(head, struct net_bridge_port, rcu);
	destroy_nbp(p);
}

225 226 227 228 229 230 231 232 233
/* Delete port(interface) from bridge is done in two steps.
 * via RCU. First step, marks device as down. That deletes
 * all the timers and stops new packets from flowing through.
 *
 * Final cleanup doesn't occur until after all CPU's finished
 * processing packets.
 *
 * Protected from multiple admin operations by RTNL mutex
 */
L
Linus Torvalds 已提交
234 235 236 237 238
static void del_nbp(struct net_bridge_port *p)
{
	struct net_bridge *br = p->br;
	struct net_device *dev = p->dev;

239
	sysfs_remove_link(br->ifobj, p->dev->name);
240

241
	nbp_delete_promisc(p);
L
Linus Torvalds 已提交
242 243 244 245 246

	spin_lock_bh(&br->lock);
	br_stp_disable_port(p);
	spin_unlock_bh(&br->lock);

247 248
	br_ifinfo_notify(RTM_DELLINK, p);

L
Linus Torvalds 已提交
249 250
	list_del_rcu(&p->list);

251 252
	nbp_vlan_flush(p);
	br_fdb_delete_by_port(br, p, 1);
253 254
	nbp_update_port_count(br);

255 256
	dev->priv_flags &= ~IFF_BRIDGE_PORT;

257
	netdev_rx_handler_unregister(dev);
258

259
	netdev_upper_dev_unlink(dev, br->dev);
260

261 262
	br_multicast_del_port(p);

263
	kobject_uevent(&p->kobj, KOBJ_REMOVE);
264 265
	kobject_del(&p->kobj);

H
Herbert Xu 已提交
266 267
	br_netpoll_disable(p);

L
Linus Torvalds 已提交
268 269 270
	call_rcu(&p->rcu, destroy_nbp_rcu);
}

271 272
/* Delete bridge device */
void br_dev_delete(struct net_device *dev, struct list_head *head)
L
Linus Torvalds 已提交
273
{
274
	struct net_bridge *br = netdev_priv(dev);
L
Linus Torvalds 已提交
275 276 277 278 279 280
	struct net_bridge_port *p, *n;

	list_for_each_entry_safe(p, n, &br->port_list, list) {
		del_nbp(p);
	}

281 282
	br_fdb_delete_by_port(br, NULL, 1);

283
	br_vlan_flush(br);
L
Linus Torvalds 已提交
284 285 286
	del_timer_sync(&br->gc_timer);

	br_sysfs_delbr(br->dev);
287
	unregister_netdevice_queue(br->dev, head);
L
Linus Torvalds 已提交
288 289 290 291 292 293 294 295 296
}

/* find an available port number */
static int find_portno(struct net_bridge *br)
{
	int index;
	struct net_bridge_port *p;
	unsigned long *inuse;

S
Stephen Hemminger 已提交
297
	inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
L
Linus Torvalds 已提交
298 299 300 301 302 303 304 305 306 307 308 309 310 311
			GFP_KERNEL);
	if (!inuse)
		return -ENOMEM;

	set_bit(0, inuse);	/* zero is reserved */
	list_for_each_entry(p, &br->port_list, list) {
		set_bit(p->port_no, inuse);
	}
	index = find_first_zero_bit(inuse, BR_MAX_PORTS);
	kfree(inuse);

	return (index >= BR_MAX_PORTS) ? -EXFULL : index;
}

312
/* called with RTNL but without bridge lock */
313
static struct net_bridge_port *new_nbp(struct net_bridge *br,
314
				       struct net_device *dev)
L
Linus Torvalds 已提交
315 316 317
{
	int index;
	struct net_bridge_port *p;
318

L
Linus Torvalds 已提交
319 320 321 322
	index = find_portno(br);
	if (index < 0)
		return ERR_PTR(index);

S
Stephen Hemminger 已提交
323
	p = kzalloc(sizeof(*p), GFP_KERNEL);
L
Linus Torvalds 已提交
324 325 326 327 328 329
	if (p == NULL)
		return ERR_PTR(-ENOMEM);

	p->br = br;
	dev_hold(dev);
	p->dev = dev;
330
	p->path_cost = port_cost(dev);
331
	p->priority = 0x8000 >> BR_PORT_BITS;
L
Linus Torvalds 已提交
332
	p->port_no = index;
333
	p->flags = BR_LEARNING | BR_FLOOD;
L
Linus Torvalds 已提交
334 335
	br_init_port(p);
	p->state = BR_STATE_DISABLED;
336
	br_stp_port_timer_init(p);
337
	br_multicast_add_port(p);
L
Linus Torvalds 已提交
338 339 340 341

	return p;
}

342
int br_add_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
343 344
{
	struct net_device *dev;
345
	int res;
L
Linus Torvalds 已提交
346

347 348 349
	dev = alloc_netdev(sizeof(struct net_bridge), name,
			   br_dev_setup);

350
	if (!dev)
L
Linus Torvalds 已提交
351 352
		return -ENOMEM;

353
	dev_net_set(dev, net);
354
	dev->rtnl_link_ops = &br_link_ops;
355

356 357 358 359
	res = register_netdev(dev);
	if (res)
		free_netdev(dev);
	return res;
L
Linus Torvalds 已提交
360 361
}

362
int br_del_bridge(struct net *net, const char *name)
L
Linus Torvalds 已提交
363 364 365 366 367
{
	struct net_device *dev;
	int ret = 0;

	rtnl_lock();
368
	dev = __dev_get_by_name(net, name);
369
	if (dev == NULL)
L
Linus Torvalds 已提交
370 371 372 373 374 375 376 377 378 379
		ret =  -ENXIO; 	/* Could not find device */

	else if (!(dev->priv_flags & IFF_EBRIDGE)) {
		/* Attempt to delete non bridge device! */
		ret = -EPERM;
	}

	else if (dev->flags & IFF_UP) {
		/* Not shutdown yet. */
		ret = -EBUSY;
380
	}
L
Linus Torvalds 已提交
381

382
	else
383
		br_dev_delete(dev, NULL);
L
Linus Torvalds 已提交
384 385 386 387 388

	rtnl_unlock();
	return ret;
}

389
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
L
Linus Torvalds 已提交
390 391 392 393 394 395 396 397
int br_min_mtu(const struct net_bridge *br)
{
	const struct net_bridge_port *p;
	int mtu = 0;

	ASSERT_RTNL();

	if (list_empty(&br->port_list))
398
		mtu = ETH_DATA_LEN;
L
Linus Torvalds 已提交
399 400 401 402 403 404 405 406 407
	else {
		list_for_each_entry(p, &br->port_list, list) {
			if (!mtu  || p->dev->mtu < mtu)
				mtu = p->dev->mtu;
		}
	}
	return mtu;
}

408 409 410
/*
 * Recomputes features using slave's features
 */
411 412
netdev_features_t br_features_recompute(struct net_bridge *br,
	netdev_features_t features)
413 414
{
	struct net_bridge_port *p;
415
	netdev_features_t mask;
416

417
	if (list_empty(&br->port_list))
418
		return features;
419

420
	mask = features;
421
	features &= ~NETIF_F_ONE_FOR_ALL;
422 423

	list_for_each_entry(p, &br->port_list, list) {
424 425
		features = netdev_increment_features(features,
						     p->dev->features, mask);
426 427
	}

428
	return features;
429 430
}

L
Linus Torvalds 已提交
431 432 433 434 435
/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
	struct net_bridge_port *p;
	int err = 0;
436
	bool changed_addr;
L
Linus Torvalds 已提交
437

438 439
	/* Don't allow bridging non-ethernet like devices */
	if ((dev->flags & IFF_LOOPBACK) ||
440 441
	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
	    !is_valid_ether_addr(dev->dev_addr))
L
Linus Torvalds 已提交
442 443
		return -EINVAL;

444
	/* No bridging of bridges */
445
	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
L
Linus Torvalds 已提交
446 447
		return -ELOOP;

448
	/* Device is already being bridged */
449
	if (br_port_exists(dev))
L
Linus Torvalds 已提交
450 451
		return -EBUSY;

452 453 454 455
	/* No bridging devices that dislike that (e.g. wireless) */
	if (dev->priv_flags & IFF_DONT_BRIDGE)
		return -EOPNOTSUPP;

456 457
	p = new_nbp(br, dev);
	if (IS_ERR(p))
L
Linus Torvalds 已提交
458 459
		return PTR_ERR(p);

460 461
	call_netdevice_notifiers(NETDEV_JOIN, dev);

462
	err = dev_set_allmulti(dev, 1);
463 464 465
	if (err)
		goto put_back;

466 467
	err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
				   SYSFS_BRIDGE_PORT_ATTR);
468 469
	if (err)
		goto err1;
L
Linus Torvalds 已提交
470

471 472 473
	err = br_sysfs_addif(p);
	if (err)
		goto err2;
L
Linus Torvalds 已提交
474

475
	err = br_netpoll_enable(p);
S
stephen hemminger 已提交
476
	if (err)
H
Herbert Xu 已提交
477 478
		goto err3;

479
	err = netdev_master_upper_dev_link(dev, br->dev);
480
	if (err)
481
		goto err4;
482

483 484
	err = netdev_rx_handler_register(dev, br_handle_frame, p);
	if (err)
485
		goto err5;
486

487
	dev->priv_flags |= IFF_BRIDGE_PORT;
488

489
	dev_disable_lro(dev);
490 491 492

	list_add_rcu(&p->list, &br->port_list);

493 494
	nbp_update_port_count(br);

495 496
	netdev_update_features(br->dev);

497 498 499
	if (br->dev->needed_headroom < dev->needed_headroom)
		br->dev->needed_headroom = dev->needed_headroom;

500 501 502
	if (br_fdb_insert(br, p, dev->dev_addr, 0))
		netdev_err(dev, "failed insert local address bridge forwarding table\n");

503
	spin_lock_bh(&br->lock);
504
	changed_addr = br_stp_recalculate_bridge_id(br);
505

506
	if (netif_running(dev) && netif_oper_up(dev) &&
507 508
	    (br->dev->flags & IFF_UP))
		br_stp_enable_port(p);
509 510
	spin_unlock_bh(&br->lock);

511 512
	br_ifinfo_notify(RTM_NEWLINK, p);

513
	if (changed_addr)
514
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
515

516
	dev_set_mtu(br->dev, br_min_mtu(br));
517

518
	kobject_uevent(&p->kobj, KOBJ_ADD);
L
Linus Torvalds 已提交
519

520
	return 0;
521

522
err5:
523
	netdev_upper_dev_unlink(dev, br->dev);
524 525
err4:
	br_netpoll_disable(p);
H
Herbert Xu 已提交
526 527
err3:
	sysfs_remove_link(br->ifobj, p->dev->name);
528
err2:
529
	kobject_put(&p->kobj);
530
	p = NULL; /* kobject_put frees */
531
err1:
532
	dev_set_allmulti(dev, -1);
533 534
put_back:
	dev_put(dev);
535
	kfree(p);
L
Linus Torvalds 已提交
536 537 538 539 540 541
	return err;
}

/* called with RTNL */
int br_del_if(struct net_bridge *br, struct net_device *dev)
{
542
	struct net_bridge_port *p;
543
	bool changed_addr;
544

545
	p = br_port_get_rtnl(dev);
546
	if (!p || p->br != br)
L
Linus Torvalds 已提交
547 548
		return -EINVAL;

549 550 551 552
	/* Since more than one interface can be attached to a bridge,
	 * there still maybe an alternate path for netconsole to use;
	 * therefore there is no reason for a NETDEV_RELEASE event.
	 */
L
Linus Torvalds 已提交
553 554 555
	del_nbp(p);

	spin_lock_bh(&br->lock);
556
	changed_addr = br_stp_recalculate_bridge_id(br);
L
Linus Torvalds 已提交
557 558
	spin_unlock_bh(&br->lock);

559 560 561
	if (changed_addr)
		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);

562 563
	netdev_update_features(br->dev);

L
Linus Torvalds 已提交
564 565
	return 0;
}
566 567 568 569 570 571 572 573

void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
{
	struct net_bridge *br = p->br;

	if (mask & BR_AUTO_MASK)
		nbp_update_port_count(br);
}