br_netfilter.c 26.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 *	Handle firewalling
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek               <buytenh@gnu.org>
 *	Bart De Schuymer (maintainer)	<bdschuym@pandora.be>
 *
 *	Changes:
 *	Apr 29 2003: physdev module support (bdschuym)
 *	Jun 19 2003: let arptables see bridged ARP traffic (bdschuym)
 *	Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge
 *	             (bdschuym)
 *	Sep 01 2004: add IPv6 filtering (bdschuym)
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	Lennert dedicates this file to Kerstin Wurdinger.
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
29
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
30 31
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
32 33
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
L
Linus Torvalds 已提交
34 35 36 37 38
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_arp.h>
#include <linux/in_route.h>
39
#include <linux/inetdevice.h>
40

L
Linus Torvalds 已提交
41 42
#include <net/ip.h>
#include <net/ipv6.h>
43 44
#include <net/route.h>

L
Linus Torvalds 已提交
45 46 47 48 49 50 51 52
#include <asm/uaccess.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
				 (skb->nf_bridge->data))->daddr.ipv4)
53 54
#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
#define dnat_took_place(skb)	 (skb_origaddr(skb) != ip_hdr(skb)->daddr)
L
Linus Torvalds 已提交
55 56 57

#ifdef CONFIG_SYSCTL
static struct ctl_table_header *brnf_sysctl_header;
58 59 60 61
static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
static int brnf_filter_vlan_tagged __read_mostly = 1;
62
static int brnf_filter_pppoe_tagged __read_mostly = 1;
L
Linus Torvalds 已提交
63 64
#else
#define brnf_filter_vlan_tagged 1
65
#define brnf_filter_pppoe_tagged 1
L
Linus Torvalds 已提交
66 67
#endif

D
Dave Jones 已提交
68
static inline __be16 vlan_proto(const struct sk_buff *skb)
69 70 71 72 73 74
{
	return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
}

#define IS_VLAN_IP(skb) \
	(skb->protocol == htons(ETH_P_8021Q) && \
75
	 vlan_proto(skb) == htons(ETH_P_IP) && 	\
76 77 78 79 80 81 82 83 84 85 86
	 brnf_filter_vlan_tagged)

#define IS_VLAN_IPV6(skb) \
	(skb->protocol == htons(ETH_P_8021Q) && \
	 vlan_proto(skb) == htons(ETH_P_IPV6) &&\
	 brnf_filter_vlan_tagged)

#define IS_VLAN_ARP(skb) \
	(skb->protocol == htons(ETH_P_8021Q) &&	\
	 vlan_proto(skb) == htons(ETH_P_ARP) &&	\
	 brnf_filter_vlan_tagged)
L
Linus Torvalds 已提交
87

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
static inline __be16 pppoe_proto(const struct sk_buff *skb)
{
	return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
			    sizeof(struct pppoe_hdr)));
}

#define IS_PPPOE_IP(skb) \
	(skb->protocol == htons(ETH_P_PPP_SES) && \
	 pppoe_proto(skb) == htons(PPP_IP) && \
	 brnf_filter_pppoe_tagged)

#define IS_PPPOE_IPV6(skb) \
	(skb->protocol == htons(ETH_P_PPP_SES) && \
	 pppoe_proto(skb) == htons(PPP_IPV6) && \
	 brnf_filter_pppoe_tagged)

L
Linus Torvalds 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
/* We need these fake structures to make netfilter happy --
 * lots of places assume that skb->dst != NULL, which isn't
 * all that unreasonable.
 *
 * Currently, we fill in the PMTU entry because netfilter
 * refragmentation needs it, and the rt_flags entry because
 * ipt_REJECT needs it.  Future netfilter modules might
 * require us to fill additional fields. */
static struct net_device __fake_net_device = {
	.hard_header_len	= ETH_HLEN
};

static struct rtable __fake_rtable = {
	.u = {
		.dst = {
			.__refcnt		= ATOMIC_INIT(1),
			.dev			= &__fake_net_device,
			.path			= &__fake_rtable.u.dst,
			.metrics		= {[RTAX_MTU - 1] = 1500},
123
			.flags			= DST_NOXFRM,
L
Linus Torvalds 已提交
124 125 126 127 128
		}
	},
	.rt_flags	= 0,
};

129 130 131 132 133 134
static inline struct net_device *bridge_parent(const struct net_device *dev)
{
	struct net_bridge_port *port = rcu_dereference(dev->br_port);

	return port ? port->br->dev : NULL;
}
L
Linus Torvalds 已提交
135

136 137 138 139 140 141 142 143 144
static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
{
	skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
	if (likely(skb->nf_bridge))
		atomic_set(&(skb->nf_bridge->use), 1);

	return skb->nf_bridge;
}

145 146 147 148 149 150 151 152 153
static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
{
	unsigned int len = nf_bridge_encap_header_len(skb);

	skb_push(skb, len);
	skb->network_header -= len;
}

static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
154
{
155 156 157 158 159
	unsigned int len = nf_bridge_encap_header_len(skb);

	skb_pull(skb, len);
	skb->network_header += len;
}
160

161 162 163 164 165 166 167 168 169 170 171
static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
{
	unsigned int len = nf_bridge_encap_header_len(skb);

	skb_pull_rcsum(skb, len);
	skb->network_header += len;
}

static inline void nf_bridge_save_header(struct sk_buff *skb)
{
	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
172

173 174
	skb_copy_from_linear_data_offset(skb, -header_size,
					 skb->nf_bridge->data, header_size);
175 176
}

177 178 179 180 181 182 183
/*
 * When forwarding bridge frames, we save a copy of the original
 * header before processing.
 */
int nf_bridge_copy_header(struct sk_buff *skb)
{
	int err;
184
	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
185

H
Herbert Xu 已提交
186
	err = skb_cow_head(skb, header_size);
187 188 189
	if (err)
		return err;

190 191
	skb_copy_to_linear_data_offset(skb, -header_size,
				       skb->nf_bridge->data, header_size);
192
	__skb_push(skb, nf_bridge_encap_header_len(skb));
193 194 195
	return 0;
}

L
Linus Torvalds 已提交
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
 * bridge PRE_ROUTING hook. */
static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;

	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
	}
	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;

	skb->dst = (struct dst_entry *)&__fake_rtable;
	dst_hold(skb->dst);

	skb->dev = nf_bridge->physindev;
213
	nf_bridge_push_encap_header(skb);
L
Linus Torvalds 已提交
214 215 216 217 218 219 220 221 222 223 224 225
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
		       br_handle_frame_finish, 1);

	return 0;
}

static void __br_dnat_complain(void)
{
	static unsigned long last_complaint;

	if (jiffies - last_complaint >= 5 * HZ) {
		printk(KERN_WARNING "Performing cross-bridge DNAT requires IP "
226
		       "forwarding to be enabled\n");
L
Linus Torvalds 已提交
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
		last_complaint = jiffies;
	}
}

/* This requires some explaining. If DNAT has taken place,
 * we will need to fix up the destination Ethernet address,
 * and this is a tricky process.
 *
 * There are two cases to consider:
 * 1. The packet was DNAT'ed to a device in the same bridge
 *    port group as it was received on. We can still bridge
 *    the packet.
 * 2. The packet was DNAT'ed to a different device, either
 *    a non-bridged device or another bridge port group.
 *    The packet will need to be routed.
 *
 * The correct way of distinguishing between these two cases is to
 * call ip_route_input() and to look at skb->dst->dev, which is
 * changed to the destination device if ip_route_input() succeeds.
 *
 * Let us first consider the case that ip_route_input() succeeds:
 *
 * If skb->dst->dev equals the logical bridge device the packet
250 251 252
 * came in on, we can consider this bridging. The packet is passed
 * through the neighbour output function to build a new destination
 * MAC address, which will make the packet enter br_nf_local_out()
L
Linus Torvalds 已提交
253 254 255 256 257
 * not much later. In that function it is assured that the iptables
 * FORWARD chain is traversed for the packet.
 *
 * Otherwise, the packet is considered to be routed and we just
 * change the destination MAC address so that the packet will
258 259 260
 * later be passed up to the IP stack to be routed. For a redirected
 * packet, ip_route_input() will give back the localhost as output device,
 * which differs from the bridge device.
L
Linus Torvalds 已提交
261 262 263
 *
 * Let us now consider the case that ip_route_input() fails:
 *
264 265
 * This can be because the destination address is martian, in which case
 * the packet will be dropped.
L
Linus Torvalds 已提交
266 267 268 269 270 271 272 273 274 275 276 277
 * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
 * will fail, while __ip_route_output_key() will return success. The source
 * address for __ip_route_output_key() is set to zero, so __ip_route_output_key
 * thinks we're handling a locally generated packet and won't care
 * if IP forwarding is allowed. We send a warning message to the users's
 * log telling her to put IP forwarding on.
 *
 * ip_route_input() will also fail if there is no route available.
 * In that case we just drop the packet.
 *
 * --Lennert, 20020411
 * --Bart, 20020416 (updated)
278 279
 * --Bart, 20021007 (updated)
 * --Bart, 20062711 (updated) */
L
Linus Torvalds 已提交
280 281 282 283 284 285 286 287 288
static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
{
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		skb->nf_bridge->mask |= BRNF_PKT_TYPE;
	}
	skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;

	skb->dev = bridge_parent(skb->dev);
289 290 291
	if (skb->dev) {
		struct dst_entry *dst = skb->dst;

292
		nf_bridge_pull_encap_header(skb);
293 294 295 296 297

		if (dst->hh)
			return neigh_hh_output(dst->hh, skb);
		else if (dst->neighbour)
			return dst->neighbour->output(skb);
L
Linus Torvalds 已提交
298
	}
299
	kfree_skb(skb);
L
Linus Torvalds 已提交
300 301 302 303 304 305
	return 0;
}

static int br_nf_pre_routing_finish(struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;
306
	struct iphdr *iph = ip_hdr(skb);
L
Linus Torvalds 已提交
307
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
308
	int err;
L
Linus Torvalds 已提交
309 310 311 312 313 314 315

	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
	}
	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
	if (dnat_took_place(skb)) {
316
		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
L
Linus Torvalds 已提交
317
			struct rtable *rt;
318 319 320 321 322 323 324 325 326
			struct flowi fl = {
				.nl_u = {
					.ip4_u = {
						 .daddr = iph->daddr,
						 .saddr = 0,
						 .tos = RT_TOS(iph->tos) },
				},
				.proto = 0,
			};
327 328 329 330 331 332 333 334 335 336 337
			struct in_device *in_dev = in_dev_get(dev);

			/* If err equals -EHOSTUNREACH the error is due to a
			 * martian destination or due to the fact that
			 * forwarding is disabled. For most martian packets,
			 * ip_route_output_key() will fail. It won't fail for 2 types of
			 * martian destinations: loopback destinations and destination
			 * 0.0.0.0. In both cases the packet will be dropped because the
			 * destination is the loopback device and not the bridge. */
			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
				goto free_skb;
L
Linus Torvalds 已提交
338 339

			if (!ip_route_output_key(&rt, &fl)) {
340
				/* - Bridged-and-DNAT'ed traffic doesn't
341 342
				 *   require ip_forwarding. */
				if (((struct dst_entry *)rt)->dev == dev) {
L
Linus Torvalds 已提交
343 344 345
					skb->dst = (struct dst_entry *)rt;
					goto bridged_dnat;
				}
346 347 348 349
				/* we are sure that forwarding is disabled, so printing
				 * this message is no problem. Note that the packet could
				 * still have a martian destination address, in which case
				 * the packet could be dropped even if forwarding were enabled */
L
Linus Torvalds 已提交
350 351 352
				__br_dnat_complain();
				dst_release((struct dst_entry *)rt);
			}
353
free_skb:
L
Linus Torvalds 已提交
354 355 356 357 358 359 360 361 362
			kfree_skb(skb);
			return 0;
		} else {
			if (skb->dst->dev == dev) {
bridged_dnat:
				/* Tell br_nf_local_out this is a
				 * bridged frame */
				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
				skb->dev = nf_bridge->physindev;
363
				nf_bridge_push_encap_header(skb);
L
Linus Torvalds 已提交
364 365 366 367 368 369
				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
					       skb, skb->dev, NULL,
					       br_nf_pre_routing_finish_bridge,
					       1);
				return 0;
			}
370
			memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN);
L
Linus Torvalds 已提交
371 372 373 374 375 376 377 378
			skb->pkt_type = PACKET_HOST;
		}
	} else {
		skb->dst = (struct dst_entry *)&__fake_rtable;
		dst_hold(skb->dst);
	}

	skb->dev = nf_bridge->physindev;
379
	nf_bridge_push_encap_header(skb);
L
Linus Torvalds 已提交
380 381 382 383 384 385 386
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
		       br_handle_frame_finish, 1);

	return 0;
}

/* Some common code for IPv4/IPv6 */
387
static struct net_device *setup_pre_routing(struct sk_buff *skb)
L
Linus Torvalds 已提交
388 389 390 391 392 393 394 395 396 397 398
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;

	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
	nf_bridge->physindev = skb->dev;
	skb->dev = bridge_parent(skb->dev);
399 400

	return skb->dev;
L
Linus Torvalds 已提交
401 402 403 404 405
}

/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
static int check_hbh_len(struct sk_buff *skb)
{
406
	unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
L
Linus Torvalds 已提交
407
	u32 pkt_len;
408 409
	const unsigned char *nh = skb_network_header(skb);
	int off = raw - nh;
410
	int len = (raw[1] + 1) << 3;
L
Linus Torvalds 已提交
411 412 413 414 415 416 417 418

	if ((raw + len) - skb->data > skb_headlen(skb))
		goto bad;

	off += 2;
	len -= 2;

	while (len > 0) {
419
		int optlen = nh[off + 1] + 2;
L
Linus Torvalds 已提交
420

421
		switch (nh[off]) {
L
Linus Torvalds 已提交
422 423 424 425 426 427 428 429
		case IPV6_TLV_PAD0:
			optlen = 1;
			break;

		case IPV6_TLV_PADN:
			break;

		case IPV6_TLV_JUMBO:
430
			if (nh[off + 1] != 4 || (off & 3) != 2)
L
Linus Torvalds 已提交
431
				goto bad;
432
			pkt_len = ntohl(*(__be32 *) (nh + off + 2));
433
			if (pkt_len <= IPV6_MAXPLEN ||
434
			    ipv6_hdr(skb)->payload_len)
435
				goto bad;
L
Linus Torvalds 已提交
436 437
			if (pkt_len > skb->len - sizeof(struct ipv6hdr))
				goto bad;
438
			if (pskb_trim_rcsum(skb,
439
					    pkt_len + sizeof(struct ipv6hdr)))
440
				goto bad;
441
			nh = skb_network_header(skb);
L
Linus Torvalds 已提交
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
			break;
		default:
			if (optlen > len)
				goto bad;
			break;
		}
		off += optlen;
		len -= optlen;
	}
	if (len == 0)
		return 0;
bad:
	return -1;

}

/* Replicate the checks that IPv6 does on packet reception and pass the packet
 * to ip6tables, which doesn't support NAT, so things are fairly simple. */
static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
461 462 463 464
					   struct sk_buff *skb,
					   const struct net_device *in,
					   const struct net_device *out,
					   int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
465 466 467 468 469 470 471 472 473 474
{
	struct ipv6hdr *hdr;
	u32 pkt_len;

	if (skb->len < sizeof(struct ipv6hdr))
		goto inhdr_error;

	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
		goto inhdr_error;

475
	hdr = ipv6_hdr(skb);
L
Linus Torvalds 已提交
476 477 478 479 480 481 482 483 484

	if (hdr->version != 6)
		goto inhdr_error;

	pkt_len = ntohs(hdr->payload_len);

	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
			goto inhdr_error;
H
Herbert Xu 已提交
485 486
		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
			goto inhdr_error;
L
Linus Torvalds 已提交
487 488
	}
	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
489
		goto inhdr_error;
L
Linus Torvalds 已提交
490

491
	nf_bridge_put(skb->nf_bridge);
492
	if (!nf_bridge_alloc(skb))
L
Linus Torvalds 已提交
493
		return NF_DROP;
494 495
	if (!setup_pre_routing(skb))
		return NF_DROP;
L
Linus Torvalds 已提交
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511

	NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
		br_nf_pre_routing_finish_ipv6);

	return NF_STOLEN;

inhdr_error:
	return NF_DROP;
}

/* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
 * Replicate the checks that IPv4 does on packet reception.
 * Set skb->dev to the bridge device (i.e. parent of the
 * receiving device) to make netfilter happy, the REDIRECT
 * target in particular.  Save the original destination IP
 * address to be able to detect DNAT afterwards. */
512
static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
513 514 515
				      const struct net_device *in,
				      const struct net_device *out,
				      int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
516 517
{
	struct iphdr *iph;
518 519 520 521
	__u32 len = nf_bridge_encap_header_len(skb);

	if (unlikely(!pskb_may_pull(skb, len)))
		goto out;
L
Linus Torvalds 已提交
522

523 524
	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
	    IS_PPPOE_IPV6(skb)) {
L
Linus Torvalds 已提交
525 526 527 528
#ifdef CONFIG_SYSCTL
		if (!brnf_call_ip6tables)
			return NF_ACCEPT;
#endif
529
		nf_bridge_pull_encap_header_rcsum(skb);
L
Linus Torvalds 已提交
530 531 532 533 534 535 536
		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
	}
#ifdef CONFIG_SYSCTL
	if (!brnf_call_iptables)
		return NF_ACCEPT;
#endif

537 538
	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
	    !IS_PPPOE_IP(skb))
L
Linus Torvalds 已提交
539 540
		return NF_ACCEPT;

541
	nf_bridge_pull_encap_header_rcsum(skb);
L
Linus Torvalds 已提交
542 543 544 545

	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
		goto inhdr_error;

546
	iph = ip_hdr(skb);
L
Linus Torvalds 已提交
547 548 549
	if (iph->ihl < 5 || iph->version != 4)
		goto inhdr_error;

550
	if (!pskb_may_pull(skb, 4 * iph->ihl))
L
Linus Torvalds 已提交
551 552
		goto inhdr_error;

553
	iph = ip_hdr(skb);
554
	if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
L
Linus Torvalds 已提交
555 556 557
		goto inhdr_error;

	len = ntohs(iph->tot_len);
558
	if (skb->len < len || len < 4 * iph->ihl)
L
Linus Torvalds 已提交
559 560
		goto inhdr_error;

H
Herbert Xu 已提交
561
	pskb_trim_rcsum(skb, len);
L
Linus Torvalds 已提交
562

563
	nf_bridge_put(skb->nf_bridge);
564
	if (!nf_bridge_alloc(skb))
L
Linus Torvalds 已提交
565
		return NF_DROP;
566 567
	if (!setup_pre_routing(skb))
		return NF_DROP;
L
Linus Torvalds 已提交
568 569 570 571 572 573 574 575
	store_orig_dstaddr(skb);

	NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
		br_nf_pre_routing_finish);

	return NF_STOLEN;

inhdr_error:
576
//      IP_INC_STATS_BH(IpInHdrErrors);
L
Linus Torvalds 已提交
577 578 579 580 581 582 583 584 585 586 587 588
out:
	return NF_DROP;
}


/* PF_BRIDGE/LOCAL_IN ************************************************/
/* The packet is locally destined, which requires a real
 * dst_entry, so detach the fake one.  On the way up, the
 * packet would pass through PRE_ROUTING again (which already
 * took place when the packet entered the bridge), but we
 * register an IPv4 PRE_ROUTING 'sabotage' hook that will
 * prevent this from happening. */
589
static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
590 591 592
				   const struct net_device *in,
				   const struct net_device *out,
				   int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
{
	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
		dst_release(skb->dst);
		skb->dst = NULL;
	}

	return NF_ACCEPT;
}

/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct sk_buff *skb)
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
	struct net_device *in;

608
	if (skb->protocol != htons(ETH_P_ARP) && !IS_VLAN_ARP(skb)) {
L
Linus Torvalds 已提交
609 610 611 612 613 614 615 616
		in = nf_bridge->physindev;
		if (nf_bridge->mask & BRNF_PKT_TYPE) {
			skb->pkt_type = PACKET_OTHERHOST;
			nf_bridge->mask ^= BRNF_PKT_TYPE;
		}
	} else {
		in = *((struct net_device **)(skb->cb));
	}
617
	nf_bridge_push_encap_header(skb);
L
Linus Torvalds 已提交
618
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
619
		       skb->dev, br_forward_finish, 1);
L
Linus Torvalds 已提交
620 621 622 623 624 625 626 627
	return 0;
}

/* This is the 'purely bridged' case.  For IP, we pass the packet to
 * netfilter with indev and outdev set to the bridge device,
 * but we are still able to filter on the 'real' indev/outdev
 * because of the physdev module. For ARP, indev and outdev are the
 * bridge ports. */
628
static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
629 630 631
				     const struct net_device *in,
				     const struct net_device *out,
				     int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
632 633
{
	struct nf_bridge_info *nf_bridge;
634
	struct net_device *parent;
L
Linus Torvalds 已提交
635 636 637 638 639
	int pf;

	if (!skb->nf_bridge)
		return NF_ACCEPT;

640 641 642 643
	parent = bridge_parent(out);
	if (!parent)
		return NF_DROP;

644 645
	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
	    IS_PPPOE_IP(skb))
L
Linus Torvalds 已提交
646 647 648 649
		pf = PF_INET;
	else
		pf = PF_INET6;

650
	nf_bridge_pull_encap_header(skb);
L
Linus Torvalds 已提交
651 652 653 654 655 656 657 658 659 660 661

	nf_bridge = skb->nf_bridge;
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

	/* The physdev module checks on this */
	nf_bridge->mask |= BRNF_BRIDGED;
	nf_bridge->physoutdev = skb->dev;

662 663
	NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent,
		br_nf_forward_finish);
L
Linus Torvalds 已提交
664 665 666 667

	return NF_STOLEN;
}

668
static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
669 670 671
				      const struct net_device *in,
				      const struct net_device *out,
				      int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
672 673 674 675 676 677 678 679
{
	struct net_device **d = (struct net_device **)(skb->cb);

#ifdef CONFIG_SYSCTL
	if (!brnf_call_arptables)
		return NF_ACCEPT;
#endif

680
	if (skb->protocol != htons(ETH_P_ARP)) {
681
		if (!IS_VLAN_ARP(skb))
L
Linus Torvalds 已提交
682
			return NF_ACCEPT;
683
		nf_bridge_pull_encap_header(skb);
L
Linus Torvalds 已提交
684 685
	}

686
	if (arp_hdr(skb)->ar_pln != 4) {
687
		if (IS_VLAN_ARP(skb))
688
			nf_bridge_push_encap_header(skb);
L
Linus Torvalds 已提交
689 690 691 692 693 694 695 696 697
		return NF_ACCEPT;
	}
	*d = (struct net_device *)in;
	NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
		(struct net_device *)out, br_nf_forward_finish);

	return NF_STOLEN;
}

698 699 700
/* PF_BRIDGE/LOCAL_OUT ***********************************************
 *
 * This function sees both locally originated IP packets and forwarded
L
Linus Torvalds 已提交
701 702 703 704 705 706 707 708
 * IP packets (in both cases the destination device is a bridge
 * device). It also sees bridged-and-DNAT'ed packets.
 *
 * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
 * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
 * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
 * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
 * will be executed.
709
 */
710
static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
711 712 713
				    const struct net_device *in,
				    const struct net_device *out,
				    int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
714
{
715
	struct net_device *realindev;
L
Linus Torvalds 已提交
716 717 718 719 720 721
	struct nf_bridge_info *nf_bridge;

	if (!skb->nf_bridge)
		return NF_ACCEPT;

	nf_bridge = skb->nf_bridge;
722 723
	if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
		return NF_ACCEPT;
L
Linus Torvalds 已提交
724 725 726

	/* Bridged, take PF_BRIDGE/FORWARD.
	 * (see big note in front of br_nf_pre_routing_finish) */
727 728
	nf_bridge->physoutdev = skb->dev;
	realindev = nf_bridge->physindev;
L
Linus Torvalds 已提交
729

730 731 732
	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
L
Linus Torvalds 已提交
733
	}
734
	nf_bridge_push_encap_header(skb);
L
Linus Torvalds 已提交
735

736 737
	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
		br_forward_finish);
L
Linus Torvalds 已提交
738 739 740
	return NF_STOLEN;
}

741 742 743 744
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
	if (skb->protocol == htons(ETH_P_IP) &&
	    skb->len > skb->dev->mtu &&
H
Herbert Xu 已提交
745
	    !skb_is_gso(skb))
746 747 748 749
		return ip_fragment(skb, br_dev_queue_push_xmit);
	else
		return br_dev_queue_push_xmit(skb);
}
L
Linus Torvalds 已提交
750 751

/* PF_BRIDGE/POST_ROUTING ********************************************/
752
static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
753 754 755
				       const struct net_device *in,
				       const struct net_device *out,
				       int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
756
{
757
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
L
Linus Torvalds 已提交
758 759 760 761 762 763
	struct net_device *realoutdev = bridge_parent(skb->dev);
	int pf;

#ifdef CONFIG_NETFILTER_DEBUG
	/* Be very paranoid. This probably won't happen anymore, but let's
	 * keep the check just to be sure... */
764 765
	if (skb_mac_header(skb) < skb->head ||
	    skb_mac_header(skb) + ETH_HLEN > skb->data) {
L
Linus Torvalds 已提交
766
		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
767
		       "bad mac.raw pointer.\n");
L
Linus Torvalds 已提交
768 769 770 771 772 773 774
		goto print_error;
	}
#endif

	if (!nf_bridge)
		return NF_ACCEPT;

775 776 777
	if (!(nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)))
		return NF_ACCEPT;

778 779 780
	if (!realoutdev)
		return NF_DROP;

781 782
	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
	    IS_PPPOE_IP(skb))
L
Linus Torvalds 已提交
783 784 785 786 787 788
		pf = PF_INET;
	else
		pf = PF_INET6;

#ifdef CONFIG_NETFILTER_DEBUG
	if (skb->dst == NULL) {
789
		printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
L
Linus Torvalds 已提交
790 791 792 793 794 795 796 797 798 799 800
		goto print_error;
	}
#endif

	/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
	 * about the value of skb->pkt_type. */
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

801
	nf_bridge_pull_encap_header(skb);
L
Linus Torvalds 已提交
802 803 804 805 806 807 808
	nf_bridge_save_header(skb);

#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
	if (nf_bridge->netoutdev)
		realoutdev = nf_bridge->netoutdev;
#endif
	NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
809
		br_nf_dev_queue_xmit);
L
Linus Torvalds 已提交
810 811 812 813 814 815 816

	return NF_STOLEN;

#ifdef CONFIG_NETFILTER_DEBUG
print_error:
	if (skb->dev != NULL) {
		printk("[%s]", skb->dev->name);
817 818
		if (realoutdev)
			printk("[%s]", realoutdev->name);
L
Linus Torvalds 已提交
819
	}
820
	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
821
	       skb->data);
822
	dump_stack();
L
Linus Torvalds 已提交
823 824 825 826 827 828 829
	return NF_ACCEPT;
#endif
}

/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
 * for the second time. */
830
static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
831 832 833
				   const struct net_device *in,
				   const struct net_device *out,
				   int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
834
{
835 836
	if (skb->nf_bridge &&
	    !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
L
Linus Torvalds 已提交
837 838 839 840 841 842 843 844 845 846 847
		return NF_STOP;
	}

	return NF_ACCEPT;
}

/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
 * ip_refrag() can return NF_STOLEN. */
static struct nf_hook_ops br_nf_ops[] = {
848 849 850 851
	{ .hook = br_nf_pre_routing,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_PRE_ROUTING,
L
Linus Torvalds 已提交
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_local_in,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_LOCAL_IN,
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_forward_ip,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_FORWARD,
	  .priority = NF_BR_PRI_BRNF - 1, },
	{ .hook = br_nf_forward_arp,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_FORWARD,
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_local_out,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_LOCAL_OUT,
	  .priority = NF_BR_PRI_FIRST, },
	{ .hook = br_nf_post_routing,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_POST_ROUTING,
	  .priority = NF_BR_PRI_LAST, },
	{ .hook = ip_sabotage_in,
	  .owner = THIS_MODULE,
	  .pf = PF_INET,
	  .hooknum = NF_IP_PRE_ROUTING,
	  .priority = NF_IP_PRI_FIRST, },
	{ .hook = ip_sabotage_in,
	  .owner = THIS_MODULE,
	  .pf = PF_INET6,
	  .hooknum = NF_IP6_PRE_ROUTING,
	  .priority = NF_IP6_PRI_FIRST, },
};

#ifdef CONFIG_SYSCTL
static
892 893
int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
			    void __user * buffer, size_t * lenp, loff_t * ppos)
L
Linus Torvalds 已提交
894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
{
	int ret;

	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);

	if (write && *(int *)(ctl->data))
		*(int *)(ctl->data) = 1;
	return ret;
}

static ctl_table brnf_table[] = {
	{
		.procname	= "bridge-nf-call-arptables",
		.data		= &brnf_call_arptables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.procname	= "bridge-nf-call-iptables",
		.data		= &brnf_call_iptables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.procname	= "bridge-nf-call-ip6tables",
		.data		= &brnf_call_ip6tables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.procname	= "bridge-nf-filter-vlan-tagged",
		.data		= &brnf_filter_vlan_tagged,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
932 933 934 935 936 937 938
	},
	{
		.procname	= "bridge-nf-filter-pppoe-tagged",
		.data		= &brnf_filter_pppoe_tagged,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
L
Linus Torvalds 已提交
939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
	},
	{ .ctl_name = 0 }
};

static ctl_table brnf_bridge_table[] = {
	{
		.ctl_name	= NET_BRIDGE,
		.procname	= "bridge",
		.mode		= 0555,
		.child		= brnf_table,
	},
	{ .ctl_name = 0 }
};

static ctl_table brnf_net_table[] = {
	{
		.ctl_name	= CTL_NET,
		.procname	= "net",
		.mode		= 0555,
		.child		= brnf_bridge_table,
	},
	{ .ctl_name = 0 }
};
#endif

964
int __init br_netfilter_init(void)
L
Linus Torvalds 已提交
965
{
966
	int ret;
L
Linus Torvalds 已提交
967

968 969
	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
	if (ret < 0)
L
Linus Torvalds 已提交
970 971
		return ret;
#ifdef CONFIG_SYSCTL
972
	brnf_sysctl_header = register_sysctl_table(brnf_net_table);
L
Linus Torvalds 已提交
973
	if (brnf_sysctl_header == NULL) {
974 975
		printk(KERN_WARNING
		       "br_netfilter: can't register to sysctl.\n");
976 977
		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
		return -ENOMEM;
L
Linus Torvalds 已提交
978 979 980 981 982 983 984 985
	}
#endif
	printk(KERN_NOTICE "Bridge firewalling registered\n");
	return 0;
}

void br_netfilter_fini(void)
{
986
	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
L
Linus Torvalds 已提交
987 988 989 990
#ifdef CONFIG_SYSCTL
	unregister_sysctl_table(brnf_sysctl_header);
#endif
}