act_nat.c 6.4 KB
Newer Older
H
Herbert Xu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * Stateless NAT actions
 *
 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 */

#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/tc_act/tc_nat.h>
#include <net/act_api.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/netlink.h>
#include <net/tc_act/tc_nat.h>
#include <net/tcp.h>
#include <net/udp.h>


#define NAT_TAB_MASK	15

34 35 36 37
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
};

38
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
H
Herbert Xu 已提交
39 40
			struct tc_action *a, int ovr, int bind)
{
41
	struct nlattr *tb[TCA_NAT_MAX + 1];
H
Herbert Xu 已提交
42
	struct tc_nat *parm;
43
	int ret = 0, err;
H
Herbert Xu 已提交
44 45
	struct tcf_nat *p;

46
	if (nla == NULL)
H
Herbert Xu 已提交
47 48
		return -EINVAL;

49
	err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy);
50 51 52
	if (err < 0)
		return err;

53
	if (tb[TCA_NAT_PARMS] == NULL)
H
Herbert Xu 已提交
54
		return -EINVAL;
55
	parm = nla_data(tb[TCA_NAT_PARMS]);
H
Herbert Xu 已提交
56

57
	if (!tcf_hash_check(parm->index, a, bind)) {
58 59
		ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
				      bind, false);
60 61
		if (ret)
			return ret;
H
Herbert Xu 已提交
62 63
		ret = ACT_P_CREATED;
	} else {
64 65
		if (bind)
			return 0;
66
		tcf_hash_release(a, bind);
67
		if (!ovr)
H
Herbert Xu 已提交
68 69
			return -EEXIST;
	}
70
	p = to_tcf_nat(a);
H
Herbert Xu 已提交
71 72 73 74 75 76 77 78 79 80 81

	spin_lock_bh(&p->tcf_lock);
	p->old_addr = parm->old_addr;
	p->new_addr = parm->new_addr;
	p->mask = parm->mask;
	p->flags = parm->flags;

	p->tcf_action = parm->action;
	spin_unlock_bh(&p->tcf_lock);

	if (ret == ACT_P_CREATED)
82
		tcf_hash_insert(a);
H
Herbert Xu 已提交
83 84 85 86

	return ret;
}

87
static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
H
Herbert Xu 已提交
88 89 90 91 92 93 94 95 96 97 98
		   struct tcf_result *res)
{
	struct tcf_nat *p = a->priv;
	struct iphdr *iph;
	__be32 old_addr;
	__be32 new_addr;
	__be32 mask;
	__be32 addr;
	int egress;
	int action;
	int ihl;
C
Changli Gao 已提交
99
	int noff;
H
Herbert Xu 已提交
100 101 102 103 104 105 106 107 108 109

	spin_lock(&p->tcf_lock);

	p->tcf_tm.lastuse = jiffies;
	old_addr = p->old_addr;
	new_addr = p->new_addr;
	mask = p->mask;
	egress = p->flags & TCA_NAT_FLAG_EGRESS;
	action = p->tcf_action;

110
	bstats_update(&p->tcf_bstats, skb);
H
Herbert Xu 已提交
111 112 113 114 115 116

	spin_unlock(&p->tcf_lock);

	if (unlikely(action == TC_ACT_SHOT))
		goto drop;

C
Changli Gao 已提交
117 118
	noff = skb_network_offset(skb);
	if (!pskb_may_pull(skb, sizeof(*iph) + noff))
H
Herbert Xu 已提交
119 120 121 122 123 124 125 126 127 128
		goto drop;

	iph = ip_hdr(skb);

	if (egress)
		addr = iph->saddr;
	else
		addr = iph->daddr;

	if (!((old_addr ^ addr) & mask)) {
129
		if (skb_try_make_writable(skb, sizeof(*iph) + noff))
H
Herbert Xu 已提交
130 131 132 133 134 135 136 137 138 139 140 141
			goto drop;

		new_addr &= mask;
		new_addr |= addr & ~mask;

		/* Rewrite IP header */
		iph = ip_hdr(skb);
		if (egress)
			iph->saddr = new_addr;
		else
			iph->daddr = new_addr;

142
		csum_replace4(&iph->check, addr, new_addr);
143 144 145
	} else if ((iph->frag_off & htons(IP_OFFSET)) ||
		   iph->protocol != IPPROTO_ICMP) {
		goto out;
H
Herbert Xu 已提交
146 147 148 149 150 151 152 153 154 155
	}

	ihl = iph->ihl * 4;

	/* It would be nice to share code with stateful NAT. */
	switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
	case IPPROTO_TCP:
	{
		struct tcphdr *tcph;

C
Changli Gao 已提交
156
		if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
157
		    skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
H
Herbert Xu 已提交
158 159 160
			goto drop;

		tcph = (void *)(skb_network_header(skb) + ihl);
161 162
		inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr,
					 true);
H
Herbert Xu 已提交
163 164 165 166 167 168
		break;
	}
	case IPPROTO_UDP:
	{
		struct udphdr *udph;

C
Changli Gao 已提交
169
		if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
170
		    skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
H
Herbert Xu 已提交
171 172 173 174
			goto drop;

		udph = (void *)(skb_network_header(skb) + ihl);
		if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
175
			inet_proto_csum_replace4(&udph->check, skb, addr,
176
						 new_addr, true);
H
Herbert Xu 已提交
177 178 179 180 181 182 183 184 185
			if (!udph->check)
				udph->check = CSUM_MANGLED_0;
		}
		break;
	}
	case IPPROTO_ICMP:
	{
		struct icmphdr *icmph;

C
Changli Gao 已提交
186
		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff))
H
Herbert Xu 已提交
187 188 189 190 191 192 193 194 195
			goto drop;

		icmph = (void *)(skb_network_header(skb) + ihl);

		if ((icmph->type != ICMP_DEST_UNREACH) &&
		    (icmph->type != ICMP_TIME_EXCEEDED) &&
		    (icmph->type != ICMP_PARAMETERPROB))
			break;

C
Changli Gao 已提交
196 197
		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) +
					noff))
198 199
			goto drop;

C
Changli Gao 已提交
200
		icmph = (void *)(skb_network_header(skb) + ihl);
H
Herbert Xu 已提交
201 202 203 204 205 206 207 208 209
		iph = (void *)(icmph + 1);
		if (egress)
			addr = iph->daddr;
		else
			addr = iph->saddr;

		if ((old_addr ^ addr) & mask)
			break;

210 211
		if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
					  sizeof(*iph) + noff))
H
Herbert Xu 已提交
212 213 214 215 216 217 218 219 220 221 222 223 224 225
			goto drop;

		icmph = (void *)(skb_network_header(skb) + ihl);
		iph = (void *)(icmph + 1);

		new_addr &= mask;
		new_addr |= addr & ~mask;

		/* XXX Fix up the inner checksums. */
		if (egress)
			iph->daddr = new_addr;
		else
			iph->saddr = new_addr;

226
		inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
227
					 false);
H
Herbert Xu 已提交
228 229 230 231 232 233
		break;
	}
	default:
		break;
	}

234
out:
H
Herbert Xu 已提交
235 236 237 238 239 240 241 242 243 244 245 246 247 248
	return action;

drop:
	spin_lock(&p->tcf_lock);
	p->tcf_qstats.drops++;
	spin_unlock(&p->tcf_lock);
	return TC_ACT_SHOT;
}

static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
			int bind, int ref)
{
	unsigned char *b = skb_tail_pointer(skb);
	struct tcf_nat *p = a->priv;
249 250 251 252 253 254 255 256 257 258 259
	struct tc_nat opt = {
		.old_addr = p->old_addr,
		.new_addr = p->new_addr,
		.mask     = p->mask,
		.flags    = p->flags,

		.index    = p->tcf_index,
		.action   = p->tcf_action,
		.refcnt   = p->tcf_refcnt - ref,
		.bindcnt  = p->tcf_bindcnt - bind,
	};
H
Herbert Xu 已提交
260 261
	struct tcf_t t;

262 263
	if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
		goto nla_put_failure;
H
Herbert Xu 已提交
264 265 266
	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
267 268
	if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t))
		goto nla_put_failure;
H
Herbert Xu 已提交
269 270 271

	return skb->len;

272
nla_put_failure:
H
Herbert Xu 已提交
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
	nlmsg_trim(skb, b);
	return -1;
}

static struct tc_action_ops act_nat_ops = {
	.kind		=	"nat",
	.type		=	TCA_ACT_NAT,
	.owner		=	THIS_MODULE,
	.act		=	tcf_nat,
	.dump		=	tcf_nat_dump,
	.init		=	tcf_nat_init,
};

MODULE_DESCRIPTION("Stateless NAT actions");
MODULE_LICENSE("GPL");

static int __init nat_init_module(void)
{
291
	return tcf_register_action(&act_nat_ops, NAT_TAB_MASK);
H
Herbert Xu 已提交
292 293 294 295 296 297 298 299 300
}

static void __exit nat_cleanup_module(void)
{
	tcf_unregister_action(&act_nat_ops);
}

module_init(nat_init_module);
module_exit(nat_cleanup_module);