iptable_nat.c 8.4 KB
Newer Older
1 2
/* (C) 1999-2001 Paul `Rusty' Russell
 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3
 * (C) 2011 Patrick McHardy <kaber@trash.net>
4 5 6 7 8
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
9 10

#include <linux/module.h>
11 12
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
13 14
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/ip.h>
15 16 17 18
#include <net/ip.h>

#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
19 20 21 22 23 24 25 26 27 28 29
#include <net/netfilter/nf_nat_l3proto.h>

static const struct xt_table nf_nat_ipv4_table = {
	.name		= "nat",
	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
			  (1 << NF_INET_POST_ROUTING) |
			  (1 << NF_INET_LOCAL_OUT) |
			  (1 << NF_INET_LOCAL_IN),
	.me		= THIS_MODULE,
	.af		= NFPROTO_IPV4,
};
30

31
static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
32
{
33 34 35 36 37 38 39 40 41 42 43 44 45
	/* Force range to this IP; let proto decide mapping for
	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
	 */
	struct nf_nat_range range;

	range.flags = 0;
	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);

	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
}
46

47 48 49 50 51 52 53
static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
				     const struct net_device *in,
				     const struct net_device *out,
				     struct nf_conn *ct)
{
	struct net *net = nf_ct_net(ct);
	unsigned int ret;
54

55 56 57 58
	ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
	if (ret == NF_ACCEPT) {
		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
			ret = alloc_null_binding(ct, hooknum);
59
	}
60
	return ret;
61 62 63
}

static unsigned int
64
nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
65 66 67 68
	       struct sk_buff *skb,
	       const struct net_device *in,
	       const struct net_device *out,
	       int (*okfn)(struct sk_buff *))
69 70 71 72 73
{
	struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;
	struct nf_conn_nat *nat;
	/* maniptype == SRC for postrouting. */
74
	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
75 76

	/* We never see fragments: conntrack defrags on pre-routing
77 78
	 * and local-out, and nf_nat_out protects post-routing.
	 */
79
	NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
80

81
	ct = nf_ct_get(skb, &ctinfo);
82
	/* Can't track?  It's not due to stress, or conntrack would
83 84 85 86
	 * have dropped it.  Hence it's the user's responsibilty to
	 * packet filter it out, or implement conntrack/NAT for that
	 * protocol. 8) --RR
	 */
87
	if (!ct)
88 89 90
		return NF_ACCEPT;

	/* Don't try to NAT if this packet is not conntracked */
91
	if (nf_ct_is_untracked(ct))
92 93
		return NF_ACCEPT;

94 95 96
	nat = nf_ct_nat_ext_add(ct);
	if (nat == NULL)
		return NF_ACCEPT;
97 98 99

	switch (ctinfo) {
	case IP_CT_RELATED:
100
	case IP_CT_RELATED_REPLY:
101
		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
102
			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
103
							   ops->hooknum))
104 105 106 107 108 109 110
				return NF_DROP;
			else
				return NF_ACCEPT;
		}
		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
	case IP_CT_NEW:
		/* Seen it before?  This can happen for loopback, retrans,
111 112
		 * or local packets.
		 */
113 114 115
		if (!nf_nat_initialized(ct, maniptype)) {
			unsigned int ret;

116
			ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
117
			if (ret != NF_ACCEPT)
118
				return ret;
119
		} else {
120
			pr_debug("Already setup manip %s for ct %p\n",
121
				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
122
				 ct);
123
			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
124 125
				goto oif_changed;
		}
126 127 128 129 130
		break;

	default:
		/* ESTABLISHED */
		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
131
			     ctinfo == IP_CT_ESTABLISHED_REPLY);
132
		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
133
			goto oif_changed;
134 135
	}

136
	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
137 138 139 140

oif_changed:
	nf_ct_kill_acct(ct, ctinfo, skb);
	return NF_DROP;
141 142 143
}

static unsigned int
144
nf_nat_ipv4_in(const struct nf_hook_ops *ops,
145 146 147 148
	       struct sk_buff *skb,
	       const struct net_device *in,
	       const struct net_device *out,
	       int (*okfn)(struct sk_buff *))
149 150
{
	unsigned int ret;
151
	__be32 daddr = ip_hdr(skb)->daddr;
152

153
	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
154
	if (ret != NF_DROP && ret != NF_STOLEN &&
E
Eric Dumazet 已提交
155 156 157
	    daddr != ip_hdr(skb)->daddr)
		skb_dst_drop(skb);

158 159 160 161
	return ret;
}

static unsigned int
162
nf_nat_ipv4_out(const struct nf_hook_ops *ops,
163 164 165 166
		struct sk_buff *skb,
		const struct net_device *in,
		const struct net_device *out,
		int (*okfn)(struct sk_buff *))
167 168
{
#ifdef CONFIG_XFRM
169
	const struct nf_conn *ct;
170
	enum ip_conntrack_info ctinfo;
171
	int err;
172 173 174 175
#endif
	unsigned int ret;

	/* root is playing with raw sockets. */
176 177
	if (skb->len < sizeof(struct iphdr) ||
	    ip_hdrlen(skb) < sizeof(struct iphdr))
178 179
		return NF_ACCEPT;

180
	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
181 182
#ifdef CONFIG_XFRM
	if (ret != NF_DROP && ret != NF_STOLEN &&
183
	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
184
	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
185 186
		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);

187 188
		if ((ct->tuplehash[dir].tuple.src.u3.ip !=
		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
189 190
		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
		     ct->tuplehash[dir].tuple.src.u.all !=
191 192 193 194 195
		     ct->tuplehash[!dir].tuple.dst.u.all)) {
			err = nf_xfrm_me_harder(skb, AF_INET);
			if (err < 0)
				ret = NF_DROP_ERR(err);
		}
196 197 198 199 200 201
	}
#endif
	return ret;
}

static unsigned int
202
nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
203 204 205 206
		     struct sk_buff *skb,
		     const struct net_device *in,
		     const struct net_device *out,
		     int (*okfn)(struct sk_buff *))
207
{
208
	const struct nf_conn *ct;
209 210
	enum ip_conntrack_info ctinfo;
	unsigned int ret;
211
	int err;
212 213

	/* root is playing with raw sockets. */
214 215
	if (skb->len < sizeof(struct iphdr) ||
	    ip_hdrlen(skb) < sizeof(struct iphdr))
216 217
		return NF_ACCEPT;

218
	ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
219
	if (ret != NF_DROP && ret != NF_STOLEN &&
220
	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
221 222 223
		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);

		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
224
		    ct->tuplehash[!dir].tuple.src.u3.ip) {
225 226 227
			err = ip_route_me_harder(skb, RTN_UNSPEC);
			if (err < 0)
				ret = NF_DROP_ERR(err);
228 229
		}
#ifdef CONFIG_XFRM
230
		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
231
			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
232
			 ct->tuplehash[dir].tuple.dst.u.all !=
233 234 235 236 237
			 ct->tuplehash[!dir].tuple.src.u.all) {
			err = nf_xfrm_me_harder(skb, AF_INET);
			if (err < 0)
				ret = NF_DROP_ERR(err);
		}
238
#endif
239 240 241 242
	}
	return ret;
}

243
static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
244 245
	/* Before packet filtering, change destination */
	{
246
		.hook		= nf_nat_ipv4_in,
247
		.owner		= THIS_MODULE,
248
		.pf		= NFPROTO_IPV4,
249
		.hooknum	= NF_INET_PRE_ROUTING,
250 251 252 253
		.priority	= NF_IP_PRI_NAT_DST,
	},
	/* After packet filtering, change source */
	{
254
		.hook		= nf_nat_ipv4_out,
255
		.owner		= THIS_MODULE,
256
		.pf		= NFPROTO_IPV4,
257
		.hooknum	= NF_INET_POST_ROUTING,
258 259 260 261
		.priority	= NF_IP_PRI_NAT_SRC,
	},
	/* Before packet filtering, change destination */
	{
262
		.hook		= nf_nat_ipv4_local_fn,
263
		.owner		= THIS_MODULE,
264
		.pf		= NFPROTO_IPV4,
265
		.hooknum	= NF_INET_LOCAL_OUT,
266 267 268 269
		.priority	= NF_IP_PRI_NAT_DST,
	},
	/* After packet filtering, change source */
	{
270
		.hook		= nf_nat_ipv4_fn,
271
		.owner		= THIS_MODULE,
272
		.pf		= NFPROTO_IPV4,
273
		.hooknum	= NF_INET_LOCAL_IN,
274 275 276 277
		.priority	= NF_IP_PRI_NAT_SRC,
	},
};

278
static int __net_init iptable_nat_net_init(struct net *net)
279
{
280 281 282 283 284 285 286
	struct ipt_replace *repl;

	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
	if (repl == NULL)
		return -ENOMEM;
	net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
	kfree(repl);
287
	return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
288
}
289

290 291 292 293
static void __net_exit iptable_nat_net_exit(struct net *net)
{
	ipt_unregister_table(net, net->ipv4.nat_table);
}
294

295 296 297 298
static struct pernet_operations iptable_nat_net_ops = {
	.init	= iptable_nat_net_init,
	.exit	= iptable_nat_net_exit,
};
299

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
static int __init iptable_nat_init(void)
{
	int err;

	err = register_pernet_subsys(&iptable_nat_net_ops);
	if (err < 0)
		goto err1;

	err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
	if (err < 0)
		goto err2;
	return 0;

err2:
	unregister_pernet_subsys(&iptable_nat_net_ops);
err1:
	return err;
317 318
}

319
static void __exit iptable_nat_exit(void)
320
{
321 322
	nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
	unregister_pernet_subsys(&iptable_nat_net_ops);
323 324
}

325 326
module_init(iptable_nat_init);
module_exit(iptable_nat_exit);
327 328

MODULE_LICENSE("GPL");