br_netfilter.c 26.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 *	Handle firewalling
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek               <buytenh@gnu.org>
 *	Bart De Schuymer (maintainer)	<bdschuym@pandora.be>
 *
 *	Changes:
 *	Apr 29 2003: physdev module support (bdschuym)
 *	Jun 19 2003: let arptables see bridged ARP traffic (bdschuym)
 *	Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge
 *	             (bdschuym)
 *	Sep 01 2004: add IPv6 filtering (bdschuym)
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	Lennert dedicates this file to Kerstin Wurdinger.
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
29
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
30 31 32 33 34 35 36
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_arp.h>
#include <linux/in_route.h>
37
#include <linux/inetdevice.h>
38

L
Linus Torvalds 已提交
39 40
#include <net/ip.h>
#include <net/ipv6.h>
41 42
#include <net/route.h>

L
Linus Torvalds 已提交
43 44 45 46 47 48 49 50
#include <asm/uaccess.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
				 (skb->nf_bridge->data))->daddr.ipv4)
51 52
#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
#define dnat_took_place(skb)	 (skb_origaddr(skb) != ip_hdr(skb)->daddr)
L
Linus Torvalds 已提交
53 54 55

#ifdef CONFIG_SYSCTL
static struct ctl_table_header *brnf_sysctl_header;
56 57 58 59
static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
static int brnf_filter_vlan_tagged __read_mostly = 1;
L
Linus Torvalds 已提交
60 61 62 63
#else
#define brnf_filter_vlan_tagged 1
#endif

D
Dave Jones 已提交
64
static inline __be16 vlan_proto(const struct sk_buff *skb)
65 66 67 68 69 70
{
	return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
}

#define IS_VLAN_IP(skb) \
	(skb->protocol == htons(ETH_P_8021Q) && \
71
	 vlan_proto(skb) == htons(ETH_P_IP) && 	\
72 73 74 75 76 77 78 79 80 81 82
	 brnf_filter_vlan_tagged)

#define IS_VLAN_IPV6(skb) \
	(skb->protocol == htons(ETH_P_8021Q) && \
	 vlan_proto(skb) == htons(ETH_P_IPV6) &&\
	 brnf_filter_vlan_tagged)

#define IS_VLAN_ARP(skb) \
	(skb->protocol == htons(ETH_P_8021Q) &&	\
	 vlan_proto(skb) == htons(ETH_P_ARP) &&	\
	 brnf_filter_vlan_tagged)
L
Linus Torvalds 已提交
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102

/* We need these fake structures to make netfilter happy --
 * lots of places assume that skb->dst != NULL, which isn't
 * all that unreasonable.
 *
 * Currently, we fill in the PMTU entry because netfilter
 * refragmentation needs it, and the rt_flags entry because
 * ipt_REJECT needs it.  Future netfilter modules might
 * require us to fill additional fields. */
static struct net_device __fake_net_device = {
	.hard_header_len	= ETH_HLEN
};

static struct rtable __fake_rtable = {
	.u = {
		.dst = {
			.__refcnt		= ATOMIC_INIT(1),
			.dev			= &__fake_net_device,
			.path			= &__fake_rtable.u.dst,
			.metrics		= {[RTAX_MTU - 1] = 1500},
103
			.flags			= DST_NOXFRM,
L
Linus Torvalds 已提交
104 105 106 107 108
		}
	},
	.rt_flags	= 0,
};

109 110 111 112 113 114
static inline struct net_device *bridge_parent(const struct net_device *dev)
{
	struct net_bridge_port *port = rcu_dereference(dev->br_port);

	return port ? port->br->dev : NULL;
}
L
Linus Torvalds 已提交
115

116 117 118 119 120 121 122 123 124 125 126
static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
{
	skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
	if (likely(skb->nf_bridge))
		atomic_set(&(skb->nf_bridge->use), 1);

	return skb->nf_bridge;
}

static inline void nf_bridge_save_header(struct sk_buff *skb)
{
127
	int header_size = ETH_HLEN;
128 129

	if (skb->protocol == htons(ETH_P_8021Q))
130
		header_size += VLAN_HLEN;
131 132 133 134

	memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
}

135 136 137 138 139 140 141
/*
 * When forwarding bridge frames, we save a copy of the original
 * header before processing.
 */
int nf_bridge_copy_header(struct sk_buff *skb)
{
	int err;
142
	int header_size = ETH_HLEN;
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157

	if (skb->protocol == htons(ETH_P_8021Q))
		header_size += VLAN_HLEN;

	err = skb_cow(skb, header_size);
	if (err)
		return err;

	memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);

	if (skb->protocol == htons(ETH_P_8021Q))
		__skb_push(skb, VLAN_HLEN);
	return 0;
}

L
Linus Torvalds 已提交
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
 * bridge PRE_ROUTING hook. */
static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;

	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
	}
	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;

	skb->dst = (struct dst_entry *)&__fake_rtable;
	dst_hold(skb->dst);

	skb->dev = nf_bridge->physindev;
175
	if (skb->protocol == htons(ETH_P_8021Q)) {
L
Linus Torvalds 已提交
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
		skb_push(skb, VLAN_HLEN);
		skb->nh.raw -= VLAN_HLEN;
	}
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
		       br_handle_frame_finish, 1);

	return 0;
}

static void __br_dnat_complain(void)
{
	static unsigned long last_complaint;

	if (jiffies - last_complaint >= 5 * HZ) {
		printk(KERN_WARNING "Performing cross-bridge DNAT requires IP "
191
		       "forwarding to be enabled\n");
L
Linus Torvalds 已提交
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
		last_complaint = jiffies;
	}
}

/* This requires some explaining. If DNAT has taken place,
 * we will need to fix up the destination Ethernet address,
 * and this is a tricky process.
 *
 * There are two cases to consider:
 * 1. The packet was DNAT'ed to a device in the same bridge
 *    port group as it was received on. We can still bridge
 *    the packet.
 * 2. The packet was DNAT'ed to a different device, either
 *    a non-bridged device or another bridge port group.
 *    The packet will need to be routed.
 *
 * The correct way of distinguishing between these two cases is to
 * call ip_route_input() and to look at skb->dst->dev, which is
 * changed to the destination device if ip_route_input() succeeds.
 *
 * Let us first consider the case that ip_route_input() succeeds:
 *
 * If skb->dst->dev equals the logical bridge device the packet
 * came in on, we can consider this bridging. We then call
 * skb->dst->output() which will make the packet enter br_nf_local_out()
 * not much later. In that function it is assured that the iptables
 * FORWARD chain is traversed for the packet.
 *
 * Otherwise, the packet is considered to be routed and we just
 * change the destination MAC address so that the packet will
222 223 224
 * later be passed up to the IP stack to be routed. For a redirected
 * packet, ip_route_input() will give back the localhost as output device,
 * which differs from the bridge device.
L
Linus Torvalds 已提交
225 226 227
 *
 * Let us now consider the case that ip_route_input() fails:
 *
228 229
 * This can be because the destination address is martian, in which case
 * the packet will be dropped.
L
Linus Torvalds 已提交
230 231 232 233 234 235 236 237 238 239 240 241
 * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
 * will fail, while __ip_route_output_key() will return success. The source
 * address for __ip_route_output_key() is set to zero, so __ip_route_output_key
 * thinks we're handling a locally generated packet and won't care
 * if IP forwarding is allowed. We send a warning message to the users's
 * log telling her to put IP forwarding on.
 *
 * ip_route_input() will also fail if there is no route available.
 * In that case we just drop the packet.
 *
 * --Lennert, 20020411
 * --Bart, 20020416 (updated)
242 243
 * --Bart, 20021007 (updated)
 * --Bart, 20062711 (updated) */
L
Linus Torvalds 已提交
244 245 246 247 248 249 250 251 252
static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
{
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		skb->nf_bridge->mask |= BRNF_PKT_TYPE;
	}
	skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;

	skb->dev = bridge_parent(skb->dev);
253 254 255
	if (!skb->dev)
		kfree_skb(skb);
	else {
256
		if (skb->protocol == htons(ETH_P_8021Q)) {
257 258 259 260
			skb_pull(skb, VLAN_HLEN);
			skb->nh.raw += VLAN_HLEN;
		}
		skb->dst->output(skb);
L
Linus Torvalds 已提交
261 262 263 264 265 266 267
	}
	return 0;
}

static int br_nf_pre_routing_finish(struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;
268
	struct iphdr *iph = ip_hdr(skb);
L
Linus Torvalds 已提交
269
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
270
	int err;
L
Linus Torvalds 已提交
271 272 273 274 275 276 277

	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
	}
	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
	if (dnat_took_place(skb)) {
278
		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
L
Linus Torvalds 已提交
279
			struct rtable *rt;
280 281 282 283 284 285 286 287 288
			struct flowi fl = {
				.nl_u = {
					.ip4_u = {
						 .daddr = iph->daddr,
						 .saddr = 0,
						 .tos = RT_TOS(iph->tos) },
				},
				.proto = 0,
			};
289 290 291 292 293 294 295 296 297 298 299
			struct in_device *in_dev = in_dev_get(dev);

			/* If err equals -EHOSTUNREACH the error is due to a
			 * martian destination or due to the fact that
			 * forwarding is disabled. For most martian packets,
			 * ip_route_output_key() will fail. It won't fail for 2 types of
			 * martian destinations: loopback destinations and destination
			 * 0.0.0.0. In both cases the packet will be dropped because the
			 * destination is the loopback device and not the bridge. */
			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
				goto free_skb;
L
Linus Torvalds 已提交
300 301

			if (!ip_route_output_key(&rt, &fl)) {
302
				/* - Bridged-and-DNAT'ed traffic doesn't
303 304
				 *   require ip_forwarding. */
				if (((struct dst_entry *)rt)->dev == dev) {
L
Linus Torvalds 已提交
305 306 307
					skb->dst = (struct dst_entry *)rt;
					goto bridged_dnat;
				}
308 309 310 311
				/* we are sure that forwarding is disabled, so printing
				 * this message is no problem. Note that the packet could
				 * still have a martian destination address, in which case
				 * the packet could be dropped even if forwarding were enabled */
L
Linus Torvalds 已提交
312 313 314
				__br_dnat_complain();
				dst_release((struct dst_entry *)rt);
			}
315
free_skb:
L
Linus Torvalds 已提交
316 317 318 319 320 321 322 323 324 325
			kfree_skb(skb);
			return 0;
		} else {
			if (skb->dst->dev == dev) {
bridged_dnat:
				/* Tell br_nf_local_out this is a
				 * bridged frame */
				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
				skb->dev = nf_bridge->physindev;
				if (skb->protocol ==
326
				    htons(ETH_P_8021Q)) {
L
Linus Torvalds 已提交
327 328 329 330 331 332 333 334 335
					skb_push(skb, VLAN_HLEN);
					skb->nh.raw -= VLAN_HLEN;
				}
				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
					       skb, skb->dev, NULL,
					       br_nf_pre_routing_finish_bridge,
					       1);
				return 0;
			}
336
			memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN);
L
Linus Torvalds 已提交
337 338 339 340 341 342 343 344
			skb->pkt_type = PACKET_HOST;
		}
	} else {
		skb->dst = (struct dst_entry *)&__fake_rtable;
		dst_hold(skb->dst);
	}

	skb->dev = nf_bridge->physindev;
345
	if (skb->protocol == htons(ETH_P_8021Q)) {
L
Linus Torvalds 已提交
346 347 348 349 350 351 352 353 354 355
		skb_push(skb, VLAN_HLEN);
		skb->nh.raw -= VLAN_HLEN;
	}
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
		       br_handle_frame_finish, 1);

	return 0;
}

/* Some common code for IPv4/IPv6 */
356
static struct net_device *setup_pre_routing(struct sk_buff *skb)
L
Linus Torvalds 已提交
357 358 359 360 361 362 363 364 365 366 367
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;

	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
	nf_bridge->physindev = skb->dev;
	skb->dev = bridge_parent(skb->dev);
368 369

	return skb->dev;
L
Linus Torvalds 已提交
370 371 372 373 374
}

/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
static int check_hbh_len(struct sk_buff *skb)
{
375
	unsigned char *raw = (u8 *) (skb->nh.ipv6h + 1);
L
Linus Torvalds 已提交
376
	u32 pkt_len;
377 378
	const unsigned char *nh = skb_network_header(skb);
	int off = raw - nh;
379
	int len = (raw[1] + 1) << 3;
L
Linus Torvalds 已提交
380 381 382 383 384 385 386 387

	if ((raw + len) - skb->data > skb_headlen(skb))
		goto bad;

	off += 2;
	len -= 2;

	while (len > 0) {
388
		int optlen = nh[off + 1] + 2;
L
Linus Torvalds 已提交
389

390
		switch (nh[off]) {
L
Linus Torvalds 已提交
391 392 393 394 395 396 397 398
		case IPV6_TLV_PAD0:
			optlen = 1;
			break;

		case IPV6_TLV_PADN:
			break;

		case IPV6_TLV_JUMBO:
399
			if (nh[off + 1] != 4 || (off & 3) != 2)
L
Linus Torvalds 已提交
400
				goto bad;
401
			pkt_len = ntohl(*(__be32 *) (nh + off + 2));
402 403 404
			if (pkt_len <= IPV6_MAXPLEN ||
			    skb->nh.ipv6h->payload_len)
				goto bad;
L
Linus Torvalds 已提交
405 406
			if (pkt_len > skb->len - sizeof(struct ipv6hdr))
				goto bad;
407
			if (pskb_trim_rcsum(skb,
408
					    pkt_len + sizeof(struct ipv6hdr)))
409
				goto bad;
410
			nh = skb_network_header(skb);
L
Linus Torvalds 已提交
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
			break;
		default:
			if (optlen > len)
				goto bad;
			break;
		}
		off += optlen;
		len -= optlen;
	}
	if (len == 0)
		return 0;
bad:
	return -1;

}

/* Replicate the checks that IPv6 does on packet reception and pass the packet
 * to ip6tables, which doesn't support NAT, so things are fairly simple. */
static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
430 431 432 433
					   struct sk_buff *skb,
					   const struct net_device *in,
					   const struct net_device *out,
					   int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
{
	struct ipv6hdr *hdr;
	u32 pkt_len;

	if (skb->len < sizeof(struct ipv6hdr))
		goto inhdr_error;

	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
		goto inhdr_error;

	hdr = skb->nh.ipv6h;

	if (hdr->version != 6)
		goto inhdr_error;

	pkt_len = ntohs(hdr->payload_len);

	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
			goto inhdr_error;
H
Herbert Xu 已提交
454 455
		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
			goto inhdr_error;
L
Linus Torvalds 已提交
456 457
	}
	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
458
		goto inhdr_error;
L
Linus Torvalds 已提交
459

460
	nf_bridge_put(skb->nf_bridge);
461
	if (!nf_bridge_alloc(skb))
L
Linus Torvalds 已提交
462
		return NF_DROP;
463 464
	if (!setup_pre_routing(skb))
		return NF_DROP;
L
Linus Torvalds 已提交
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481

	NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
		br_nf_pre_routing_finish_ipv6);

	return NF_STOLEN;

inhdr_error:
	return NF_DROP;
}

/* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
 * Replicate the checks that IPv4 does on packet reception.
 * Set skb->dev to the bridge device (i.e. parent of the
 * receiving device) to make netfilter happy, the REDIRECT
 * target in particular.  Save the original destination IP
 * address to be able to detect DNAT afterwards. */
static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
482 483 484
				      const struct net_device *in,
				      const struct net_device *out,
				      int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
485 486 487 488 489
{
	struct iphdr *iph;
	__u32 len;
	struct sk_buff *skb = *pskb;

490
	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb)) {
L
Linus Torvalds 已提交
491 492 493 494 495 496 497
#ifdef CONFIG_SYSCTL
		if (!brnf_call_ip6tables)
			return NF_ACCEPT;
#endif
		if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
			goto out;

498
		if (skb->protocol == htons(ETH_P_8021Q)) {
499
			skb_pull_rcsum(skb, VLAN_HLEN);
500
			skb->nh.raw += VLAN_HLEN;
L
Linus Torvalds 已提交
501 502 503 504 505 506 507 508
		}
		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
	}
#ifdef CONFIG_SYSCTL
	if (!brnf_call_iptables)
		return NF_ACCEPT;
#endif

509
	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb))
L
Linus Torvalds 已提交
510 511 512 513 514
		return NF_ACCEPT;

	if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
		goto out;

515
	if (skb->protocol == htons(ETH_P_8021Q)) {
516
		skb_pull_rcsum(skb, VLAN_HLEN);
517
		skb->nh.raw += VLAN_HLEN;
L
Linus Torvalds 已提交
518 519 520 521 522
	}

	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
		goto inhdr_error;

523
	iph = ip_hdr(skb);
L
Linus Torvalds 已提交
524 525 526
	if (iph->ihl < 5 || iph->version != 4)
		goto inhdr_error;

527
	if (!pskb_may_pull(skb, 4 * iph->ihl))
L
Linus Torvalds 已提交
528 529
		goto inhdr_error;

530
	iph = ip_hdr(skb);
531
	if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
L
Linus Torvalds 已提交
532 533 534
		goto inhdr_error;

	len = ntohs(iph->tot_len);
535
	if (skb->len < len || len < 4 * iph->ihl)
L
Linus Torvalds 已提交
536 537
		goto inhdr_error;

H
Herbert Xu 已提交
538
	pskb_trim_rcsum(skb, len);
L
Linus Torvalds 已提交
539

540
	nf_bridge_put(skb->nf_bridge);
541
	if (!nf_bridge_alloc(skb))
L
Linus Torvalds 已提交
542
		return NF_DROP;
543 544
	if (!setup_pre_routing(skb))
		return NF_DROP;
L
Linus Torvalds 已提交
545 546 547 548 549 550 551 552
	store_orig_dstaddr(skb);

	NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
		br_nf_pre_routing_finish);

	return NF_STOLEN;

inhdr_error:
553
//      IP_INC_STATS_BH(IpInHdrErrors);
L
Linus Torvalds 已提交
554 555 556 557 558 559 560 561 562 563 564 565 566
out:
	return NF_DROP;
}


/* PF_BRIDGE/LOCAL_IN ************************************************/
/* The packet is locally destined, which requires a real
 * dst_entry, so detach the fake one.  On the way up, the
 * packet would pass through PRE_ROUTING again (which already
 * took place when the packet entered the bridge), but we
 * register an IPv4 PRE_ROUTING 'sabotage' hook that will
 * prevent this from happening. */
static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb,
567 568 569
				   const struct net_device *in,
				   const struct net_device *out,
				   int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
{
	struct sk_buff *skb = *pskb;

	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
		dst_release(skb->dst);
		skb->dst = NULL;
	}

	return NF_ACCEPT;
}

/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct sk_buff *skb)
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
	struct net_device *in;

587
	if (skb->protocol != htons(ETH_P_ARP) && !IS_VLAN_ARP(skb)) {
L
Linus Torvalds 已提交
588 589 590 591 592 593 594 595
		in = nf_bridge->physindev;
		if (nf_bridge->mask & BRNF_PKT_TYPE) {
			skb->pkt_type = PACKET_OTHERHOST;
			nf_bridge->mask ^= BRNF_PKT_TYPE;
		}
	} else {
		in = *((struct net_device **)(skb->cb));
	}
596
	if (skb->protocol == htons(ETH_P_8021Q)) {
L
Linus Torvalds 已提交
597 598 599 600
		skb_push(skb, VLAN_HLEN);
		skb->nh.raw -= VLAN_HLEN;
	}
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
601
		       skb->dev, br_forward_finish, 1);
L
Linus Torvalds 已提交
602 603 604 605 606 607 608 609 610
	return 0;
}

/* This is the 'purely bridged' case.  For IP, we pass the packet to
 * netfilter with indev and outdev set to the bridge device,
 * but we are still able to filter on the 'real' indev/outdev
 * because of the physdev module. For ARP, indev and outdev are the
 * bridge ports. */
static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
611 612 613
				     const struct net_device *in,
				     const struct net_device *out,
				     int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
614 615 616
{
	struct sk_buff *skb = *pskb;
	struct nf_bridge_info *nf_bridge;
617
	struct net_device *parent;
L
Linus Torvalds 已提交
618 619 620 621 622
	int pf;

	if (!skb->nf_bridge)
		return NF_ACCEPT;

623 624 625 626
	parent = bridge_parent(out);
	if (!parent)
		return NF_DROP;

627
	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
L
Linus Torvalds 已提交
628 629 630 631
		pf = PF_INET;
	else
		pf = PF_INET6;

632
	if (skb->protocol == htons(ETH_P_8021Q)) {
L
Linus Torvalds 已提交
633 634 635 636 637 638 639 640 641 642 643 644 645 646
		skb_pull(*pskb, VLAN_HLEN);
		(*pskb)->nh.raw += VLAN_HLEN;
	}

	nf_bridge = skb->nf_bridge;
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

	/* The physdev module checks on this */
	nf_bridge->mask |= BRNF_BRIDGED;
	nf_bridge->physoutdev = skb->dev;

647 648
	NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent,
		br_nf_forward_finish);
L
Linus Torvalds 已提交
649 650 651 652 653

	return NF_STOLEN;
}

static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
654 655 656
				      const struct net_device *in,
				      const struct net_device *out,
				      int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
657 658 659 660 661 662 663 664 665
{
	struct sk_buff *skb = *pskb;
	struct net_device **d = (struct net_device **)(skb->cb);

#ifdef CONFIG_SYSCTL
	if (!brnf_call_arptables)
		return NF_ACCEPT;
#endif

666
	if (skb->protocol != htons(ETH_P_ARP)) {
667
		if (!IS_VLAN_ARP(skb))
L
Linus Torvalds 已提交
668 669 670 671 672 673
			return NF_ACCEPT;
		skb_pull(*pskb, VLAN_HLEN);
		(*pskb)->nh.raw += VLAN_HLEN;
	}

	if (skb->nh.arph->ar_pln != 4) {
674
		if (IS_VLAN_ARP(skb)) {
L
Linus Torvalds 已提交
675 676 677 678 679 680 681 682 683 684 685 686
			skb_push(*pskb, VLAN_HLEN);
			(*pskb)->nh.raw -= VLAN_HLEN;
		}
		return NF_ACCEPT;
	}
	*d = (struct net_device *)in;
	NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
		(struct net_device *)out, br_nf_forward_finish);

	return NF_STOLEN;
}

687 688 689
/* PF_BRIDGE/LOCAL_OUT ***********************************************
 *
 * This function sees both locally originated IP packets and forwarded
L
Linus Torvalds 已提交
690 691 692 693 694 695 696 697
 * IP packets (in both cases the destination device is a bridge
 * device). It also sees bridged-and-DNAT'ed packets.
 *
 * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
 * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
 * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
 * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
 * will be executed.
698
 */
L
Linus Torvalds 已提交
699
static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
700 701 702
				    const struct net_device *in,
				    const struct net_device *out,
				    int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
703
{
704
	struct net_device *realindev;
L
Linus Torvalds 已提交
705 706 707 708 709 710 711
	struct sk_buff *skb = *pskb;
	struct nf_bridge_info *nf_bridge;

	if (!skb->nf_bridge)
		return NF_ACCEPT;

	nf_bridge = skb->nf_bridge;
712 713
	if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
		return NF_ACCEPT;
L
Linus Torvalds 已提交
714 715 716

	/* Bridged, take PF_BRIDGE/FORWARD.
	 * (see big note in front of br_nf_pre_routing_finish) */
717 718
	nf_bridge->physoutdev = skb->dev;
	realindev = nf_bridge->physindev;
L
Linus Torvalds 已提交
719

720 721 722
	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
L
Linus Torvalds 已提交
723
	}
724
	if (skb->protocol == htons(ETH_P_8021Q)) {
725 726
		skb_push(skb, VLAN_HLEN);
		skb->nh.raw -= VLAN_HLEN;
L
Linus Torvalds 已提交
727 728
	}

729 730
	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
		br_forward_finish);
L
Linus Torvalds 已提交
731 732 733
	return NF_STOLEN;
}

734 735 736 737
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
	if (skb->protocol == htons(ETH_P_IP) &&
	    skb->len > skb->dev->mtu &&
H
Herbert Xu 已提交
738
	    !skb_is_gso(skb))
739 740 741 742
		return ip_fragment(skb, br_dev_queue_push_xmit);
	else
		return br_dev_queue_push_xmit(skb);
}
L
Linus Torvalds 已提交
743 744 745

/* PF_BRIDGE/POST_ROUTING ********************************************/
static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
746 747 748
				       const struct net_device *in,
				       const struct net_device *out,
				       int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
749 750 751 752 753 754 755 756 757
{
	struct sk_buff *skb = *pskb;
	struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge;
	struct net_device *realoutdev = bridge_parent(skb->dev);
	int pf;

#ifdef CONFIG_NETFILTER_DEBUG
	/* Be very paranoid. This probably won't happen anymore, but let's
	 * keep the check just to be sure... */
758 759
	if (skb_mac_header(skb) < skb->head ||
	    skb_mac_header(skb) + ETH_HLEN > skb->data) {
L
Linus Torvalds 已提交
760
		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
761
		       "bad mac.raw pointer.\n");
L
Linus Torvalds 已提交
762 763 764 765 766 767 768
		goto print_error;
	}
#endif

	if (!nf_bridge)
		return NF_ACCEPT;

769 770 771
	if (!realoutdev)
		return NF_DROP;

772
	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
L
Linus Torvalds 已提交
773 774 775 776 777 778
		pf = PF_INET;
	else
		pf = PF_INET6;

#ifdef CONFIG_NETFILTER_DEBUG
	if (skb->dst == NULL) {
779
		printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
L
Linus Torvalds 已提交
780 781 782 783 784 785 786 787 788 789 790
		goto print_error;
	}
#endif

	/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
	 * about the value of skb->pkt_type. */
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

791
	if (skb->protocol == htons(ETH_P_8021Q)) {
L
Linus Torvalds 已提交
792 793 794 795 796 797 798 799 800 801 802
		skb_pull(skb, VLAN_HLEN);
		skb->nh.raw += VLAN_HLEN;
	}

	nf_bridge_save_header(skb);

#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
	if (nf_bridge->netoutdev)
		realoutdev = nf_bridge->netoutdev;
#endif
	NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
803
		br_nf_dev_queue_xmit);
L
Linus Torvalds 已提交
804 805 806 807 808 809 810

	return NF_STOLEN;

#ifdef CONFIG_NETFILTER_DEBUG
print_error:
	if (skb->dev != NULL) {
		printk("[%s]", skb->dev->name);
811 812
		if (realoutdev)
			printk("[%s]", realoutdev->name);
L
Linus Torvalds 已提交
813
	}
814
	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
815
	       skb->data);
816
	dump_stack();
L
Linus Torvalds 已提交
817 818 819 820 821 822 823 824
	return NF_ACCEPT;
#endif
}

/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
 * for the second time. */
static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb,
825 826 827
				   const struct net_device *in,
				   const struct net_device *out,
				   int (*okfn)(struct sk_buff *))
L
Linus Torvalds 已提交
828 829 830 831 832 833 834 835 836 837 838 839 840 841
{
	if ((*pskb)->nf_bridge &&
	    !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
		return NF_STOP;
	}

	return NF_ACCEPT;
}

/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
 * ip_refrag() can return NF_STOLEN. */
static struct nf_hook_ops br_nf_ops[] = {
842 843 844 845
	{ .hook = br_nf_pre_routing,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_PRE_ROUTING,
L
Linus Torvalds 已提交
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_local_in,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_LOCAL_IN,
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_forward_ip,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_FORWARD,
	  .priority = NF_BR_PRI_BRNF - 1, },
	{ .hook = br_nf_forward_arp,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_FORWARD,
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_local_out,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_LOCAL_OUT,
	  .priority = NF_BR_PRI_FIRST, },
	{ .hook = br_nf_post_routing,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_POST_ROUTING,
	  .priority = NF_BR_PRI_LAST, },
	{ .hook = ip_sabotage_in,
	  .owner = THIS_MODULE,
	  .pf = PF_INET,
	  .hooknum = NF_IP_PRE_ROUTING,
	  .priority = NF_IP_PRI_FIRST, },
	{ .hook = ip_sabotage_in,
	  .owner = THIS_MODULE,
	  .pf = PF_INET6,
	  .hooknum = NF_IP6_PRE_ROUTING,
	  .priority = NF_IP6_PRI_FIRST, },
};

#ifdef CONFIG_SYSCTL
static
886 887
int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
			    void __user * buffer, size_t * lenp, loff_t * ppos)
L
Linus Torvalds 已提交
888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954
{
	int ret;

	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);

	if (write && *(int *)(ctl->data))
		*(int *)(ctl->data) = 1;
	return ret;
}

static ctl_table brnf_table[] = {
	{
		.ctl_name	= NET_BRIDGE_NF_CALL_ARPTABLES,
		.procname	= "bridge-nf-call-arptables",
		.data		= &brnf_call_arptables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.ctl_name	= NET_BRIDGE_NF_CALL_IPTABLES,
		.procname	= "bridge-nf-call-iptables",
		.data		= &brnf_call_iptables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.ctl_name	= NET_BRIDGE_NF_CALL_IP6TABLES,
		.procname	= "bridge-nf-call-ip6tables",
		.data		= &brnf_call_ip6tables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.ctl_name	= NET_BRIDGE_NF_FILTER_VLAN_TAGGED,
		.procname	= "bridge-nf-filter-vlan-tagged",
		.data		= &brnf_filter_vlan_tagged,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{ .ctl_name = 0 }
};

static ctl_table brnf_bridge_table[] = {
	{
		.ctl_name	= NET_BRIDGE,
		.procname	= "bridge",
		.mode		= 0555,
		.child		= brnf_table,
	},
	{ .ctl_name = 0 }
};

static ctl_table brnf_net_table[] = {
	{
		.ctl_name	= CTL_NET,
		.procname	= "net",
		.mode		= 0555,
		.child		= brnf_bridge_table,
	},
	{ .ctl_name = 0 }
};
#endif

955
int __init br_netfilter_init(void)
L
Linus Torvalds 已提交
956
{
957
	int ret;
L
Linus Torvalds 已提交
958

959 960
	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
	if (ret < 0)
L
Linus Torvalds 已提交
961 962
		return ret;
#ifdef CONFIG_SYSCTL
963
	brnf_sysctl_header = register_sysctl_table(brnf_net_table);
L
Linus Torvalds 已提交
964
	if (brnf_sysctl_header == NULL) {
965 966
		printk(KERN_WARNING
		       "br_netfilter: can't register to sysctl.\n");
967 968
		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
		return -ENOMEM;
L
Linus Torvalds 已提交
969 970 971 972 973 974 975 976
	}
#endif
	printk(KERN_NOTICE "Bridge firewalling registered\n");
	return 0;
}

void br_netfilter_fini(void)
{
977
	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
L
Linus Torvalds 已提交
978 979 980 981
#ifdef CONFIG_SYSCTL
	unregister_sysctl_table(brnf_sysctl_header);
#endif
}