br_input.c 9.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 *	Handle incoming frames
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek		<buytenh@gnu.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 */

14
#include <linux/slab.h>
L
Linus Torvalds 已提交
15 16 17 18
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/netfilter_bridge.h>
19
#include <net/netfilter/nf_queue.h>
20 21
#include <linux/neighbour.h>
#include <net/arp.h>
22
#include <linux/export.h>
23
#include <linux/rculist.h>
L
Linus Torvalds 已提交
24
#include "br_private.h"
25
#include "br_private_tunnel.h"
L
Linus Torvalds 已提交
26

27 28 29 30
/* Hook for brouter */
br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
EXPORT_SYMBOL(br_should_route_hook);

31 32
static int
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
33
{
34
	br_drop_fake_rtable(skb);
35 36 37
	return netif_receive_skb(skb);
}

38
static int br_pass_frame_up(struct sk_buff *skb)
L
Linus Torvalds 已提交
39
{
40
	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
41
	struct net_bridge *br = netdev_priv(brdev);
42
	struct net_bridge_vlan_group *vg;
43
	struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
L
Linus Torvalds 已提交
44

E
Eric Dumazet 已提交
45
	u64_stats_update_begin(&brstats->syncp);
46 47
	brstats->rx_packets++;
	brstats->rx_bytes += skb->len;
E
Eric Dumazet 已提交
48
	u64_stats_update_end(&brstats->syncp);
L
Linus Torvalds 已提交
49

50
	vg = br_vlan_group_rcu(br);
51 52 53 54 55
	/* Bridge is just like any other port.  Make sure the
	 * packet is allowed except in promisc modue when someone
	 * may be running packet capture.
	 */
	if (!(brdev->flags & IFF_PROMISC) &&
56
	    !br_allowed_egress(vg, skb)) {
57 58 59 60
		kfree_skb(skb);
		return NET_RX_DROP;
	}

L
Linus Torvalds 已提交
61
	indev = skb->dev;
62
	skb->dev = brdev;
63
	skb = br_handle_vlan(br, NULL, vg, skb);
64 65
	if (!skb)
		return NET_RX_DROP;
66
	/* update the multicast stats if the packet is IGMP/MLD */
67
	br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
68
			   BR_MCAST_DIR_TX);
L
Linus Torvalds 已提交
69

70 71
	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
		       dev_net(indev), NULL, skb, indev, NULL,
72
		       br_netif_receive_skb);
L
Linus Torvalds 已提交
73 74
}

75
/* note: already called with rcu_read_lock */
76
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
L
Linus Torvalds 已提交
77
{
78
	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
79
	enum br_pkt_type pkt_type = BR_PKT_UNICAST;
80
	struct net_bridge_fdb_entry *dst = NULL;
81
	struct net_bridge_mdb_entry *mdst;
82
	bool local_rcv, mcast_hit = false;
83
	const unsigned char *dest;
84
	struct net_bridge *br;
85
	u16 vid = 0;
L
Linus Torvalds 已提交
86

87 88 89
	if (!p || p->state == BR_STATE_DISABLED)
		goto drop;

90
	if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
91
		goto out;
92

93 94
	nbp_switchdev_frame_mark(p, skb);

95
	/* insert into forwarding database after filtering to avoid spoofing */
96
	br = p->br;
97
	if (p->flags & BR_LEARNING)
98
		br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
99

100
	local_rcv = !!(br->dev->flags & IFF_PROMISC);
101
	dest = eth_hdr(skb)->h_dest;
102 103 104 105 106 107 108 109 110 111 112
	if (is_multicast_ether_addr(dest)) {
		/* by definition the broadcast is also a multicast address */
		if (is_broadcast_ether_addr(dest)) {
			pkt_type = BR_PKT_BROADCAST;
			local_rcv = true;
		} else {
			pkt_type = BR_PKT_MULTICAST;
			if (br_multicast_rcv(br, p, skb, vid))
				goto drop;
		}
	}
113

114 115
	if (p->state == BR_STATE_LEARNING)
		goto drop;
116

117
	BR_INPUT_SKB_CB(skb)->brdev = br->dev;
118
	BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED);
119

120 121 122 123
	if (IS_ENABLED(CONFIG_INET) &&
	    (skb->protocol == htons(ETH_P_ARP) ||
	     skb->protocol == htons(ETH_P_RARP))) {
		br_do_proxy_suppress_arp(skb, br, vid, p);
124 125
	} else if (IS_ENABLED(CONFIG_IPV6) &&
		   skb->protocol == htons(ETH_P_IPV6) &&
126
		   br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
127 128 129 130 131 132 133 134
		   pskb_may_pull(skb, sizeof(struct ipv6hdr) +
				 sizeof(struct nd_msg)) &&
		   ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
			struct nd_msg *msg, _msg;

			msg = br_is_nd_neigh_msg(skb, &_msg);
			if (msg)
				br_do_suppress_nd(skb, br, vid, p, msg);
135
	}
136

137 138
	switch (pkt_type) {
	case BR_PKT_MULTICAST:
139
		mdst = br_mdb_get(br, skb, vid);
140
		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
141
		    br_multicast_querier_exists(br, eth_hdr(skb))) {
142
			if ((mdst && mdst->host_joined) ||
143
			    br_multicast_is_router(br)) {
144
				local_rcv = true;
145 146 147
				br->dev->stats.multicast++;
			}
			mcast_hit = true;
148
		} else {
149
			local_rcv = true;
150
			br->dev->stats.multicast++;
151
		}
152 153
		break;
	case BR_PKT_UNICAST:
154
		dst = br_fdb_find_rcu(br, dest, vid);
155 156
	default:
		break;
L
Linus Torvalds 已提交
157 158
	}

159
	if (dst) {
160 161
		unsigned long now = jiffies;

162 163 164
		if (dst->is_local)
			return br_pass_frame_up(skb);

165 166
		if (now != dst->used)
			dst->used = now;
167
		br_forward(dst->dst, skb, local_rcv, false);
168 169
	} else {
		if (!mcast_hit)
170
			br_flood(br, skb, pkt_type, local_rcv, false);
171
		else
172
			br_multicast_flood(mdst, skb, local_rcv, false);
173
	}
L
Linus Torvalds 已提交
174

175 176
	if (local_rcv)
		return br_pass_frame_up(skb);
177

L
Linus Torvalds 已提交
178 179
out:
	return 0;
180 181 182
drop:
	kfree_skb(skb);
	goto out;
L
Linus Torvalds 已提交
183
}
184
EXPORT_SYMBOL_GPL(br_handle_frame_finish);
L
Linus Torvalds 已提交
185

186
static void __br_handle_local_finish(struct sk_buff *skb)
187
{
188
	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
189
	u16 vid = 0;
190

191
	/* check if vlan is allowed, to avoid spoofing */
192 193 194
	if ((p->flags & BR_LEARNING) &&
	    !br_opt_get(p->br, BROPT_NO_LL_LEARN) &&
	    br_should_learn(p, skb, &vid))
195
		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
196 197 198 199 200 201 202 203
}

/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	struct net_bridge_port *p = br_port_get_rcu(skb->dev);

	__br_handle_local_finish(skb);
204 205 206 207

	BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
	br_pass_frame_up(skb);
	return 0;
208 209
}

210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb)
{
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
	struct nf_hook_entries *e = NULL;
	struct nf_hook_state state;
	unsigned int verdict, i;
	struct net *net;
	int ret;

	net = dev_net(skb->dev);
#ifdef HAVE_JUMP_LABEL
	if (!static_key_false(&nf_hooks_needed[NFPROTO_BRIDGE][NF_BR_PRE_ROUTING]))
		goto frame_finish;
#endif

	e = rcu_dereference(net->nf.hooks_bridge[NF_BR_PRE_ROUTING]);
	if (!e)
		goto frame_finish;

	nf_hook_state_init(&state, NF_BR_PRE_ROUTING,
			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
			   net, br_handle_frame_finish);

	for (i = 0; i < e->num_hook_entries; i++) {
		verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state);
		switch (verdict & NF_VERDICT_MASK) {
		case NF_ACCEPT:
			break;
		case NF_DROP:
			kfree_skb(skb);
			return RX_HANDLER_CONSUMED;
		case NF_QUEUE:
			ret = nf_queue(skb, &state, e, i, verdict);
			if (ret == 1)
				continue;
			return RX_HANDLER_CONSUMED;
		default: /* STOLEN */
			return RX_HANDLER_CONSUMED;
		}
	}
frame_finish:
	net = dev_net(skb->dev);
	br_handle_frame_finish(net, NULL, skb);
#else
	br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
#endif
	return RX_HANDLER_CONSUMED;
}

L
Linus Torvalds 已提交
259
/*
260
 * Return NULL if skb is handled
261
 * note: already called with rcu_read_lock
L
Linus Torvalds 已提交
262
 */
263
rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
L
Linus Torvalds 已提交
264
{
265
	struct net_bridge_port *p;
266
	struct sk_buff *skb = *pskb;
L
Linus Torvalds 已提交
267
	const unsigned char *dest = eth_hdr(skb)->h_dest;
268
	br_should_route_hook_t *rhook;
L
Linus Torvalds 已提交
269

270
	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
271
		return RX_HANDLER_PASS;
272

L
Linus Torvalds 已提交
273
	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
274
		goto drop;
L
Linus Torvalds 已提交
275

H
Herbert Xu 已提交
276 277
	skb = skb_share_check(skb, GFP_ATOMIC);
	if (!skb)
278
		return RX_HANDLER_CONSUMED;
H
Herbert Xu 已提交
279

280 281
	memset(skb->cb, 0, sizeof(struct br_input_skb_cb));

282
	p = br_port_get_rcu(skb->dev);
283 284 285 286 287
	if (p->flags & BR_VLAN_TUNNEL) {
		if (br_handle_ingress_vlan_tunnel(skb, p,
						  nbp_vlan_group_rcu(p)))
			goto drop;
	}
288

289
	if (unlikely(is_link_local_ether_addr(dest))) {
290 291
		u16 fwd_mask = p->br->group_fwd_mask_required;

292 293 294 295 296 297 298 299 300 301 302 303 304
		/*
		 * See IEEE 802.1D Table 7-10 Reserved addresses
		 *
		 * Assignment		 		Value
		 * Bridge Group Address		01-80-C2-00-00-00
		 * (MAC Control) 802.3		01-80-C2-00-00-01
		 * (Link Aggregation) 802.3	01-80-C2-00-00-02
		 * 802.1X PAE address		01-80-C2-00-00-03
		 *
		 * 802.1AB LLDP 		01-80-C2-00-00-0E
		 *
		 * Others reserved for future standardization
		 */
305
		fwd_mask |= p->group_fwd_mask;
306 307 308 309
		switch (dest[5]) {
		case 0x00:	/* Bridge Group Address */
			/* If STP is turned off,
			   then must forward to keep loop detection */
310 311
			if (p->br->stp_enabled == BR_NO_STP ||
			    fwd_mask & (1u << dest[5]))
312
				goto forward;
313 314 315
			*pskb = skb;
			__br_handle_local_finish(skb);
			return RX_HANDLER_PASS;
316 317

		case 0x01:	/* IEEE MAC (Pause) */
S
Stephen Hemminger 已提交
318 319
			goto drop;

320 321 322 323 324 325 326 327
		case 0x0E:	/* 802.1AB LLDP */
			fwd_mask |= p->br->group_fwd_mask;
			if (fwd_mask & (1u << dest[5]))
				goto forward;
			*pskb = skb;
			__br_handle_local_finish(skb);
			return RX_HANDLER_PASS;

328 329
		default:
			/* Allow selective forwarding for most other protocols */
330 331
			fwd_mask |= p->br->group_fwd_mask;
			if (fwd_mask & (1u << dest[5]))
332 333
				goto forward;
		}
334

335
		/* Deliver packet to local host only */
336 337 338
		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
			NULL, skb, skb->dev, NULL, br_handle_local_finish);
		return RX_HANDLER_CONSUMED;
S
Stephen Hemminger 已提交
339
	}
L
Linus Torvalds 已提交
340

341
forward:
342 343
	switch (p->state) {
	case BR_STATE_FORWARDING:
344
		rhook = rcu_dereference(br_should_route_hook);
345
		if (rhook) {
346 347 348 349
			if ((*rhook)(skb)) {
				*pskb = skb;
				return RX_HANDLER_PASS;
			}
L
Linus Torvalds 已提交
350 351
			dest = eth_hdr(skb)->h_dest;
		}
352 353
		/* fall through */
	case BR_STATE_LEARNING:
354
		if (ether_addr_equal(p->br->dev->dev_addr, dest))
L
Linus Torvalds 已提交
355 356
			skb->pkt_type = PACKET_HOST;

357
		return nf_hook_bridge_pre(skb, pskb);
358 359 360
	default:
drop:
		kfree_skb(skb);
L
Linus Torvalds 已提交
361
	}
362
	return RX_HANDLER_CONSUMED;
L
Linus Torvalds 已提交
363
}