xfrm4_policy.c 7.5 KB
Newer Older
1
/*
L
Linus Torvalds 已提交
2 3 4 5 6 7
 * xfrm4_policy.c
 *
 * Changes:
 *	Kazunori MIYAZAWA @USAGI
 * 	YOSHIFUJI Hideaki @USAGI
 *		Split up af-specific portion
8
 *
L
Linus Torvalds 已提交
9 10
 */

11 12
#include <linux/err.h>
#include <linux/kernel.h>
H
Herbert Xu 已提交
13
#include <linux/inetdevice.h>
14
#include <linux/if_tunnel.h>
15
#include <net/dst.h>
L
Linus Torvalds 已提交
16 17
#include <net/xfrm.h>
#include <net/ip.h>
18
#include <net/vrf.h>
L
Linus Torvalds 已提交
19 20 21

static struct xfrm_policy_afinfo xfrm4_policy_afinfo;

22
static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
D
David Ahern 已提交
23
					    int tos, int oif,
24 25
					    const xfrm_address_t *saddr,
					    const xfrm_address_t *daddr)
L
Linus Torvalds 已提交
26
{
27
	struct rtable *rt;
28

29 30 31
	memset(fl4, 0, sizeof(*fl4));
	fl4->daddr = daddr->a4;
	fl4->flowi4_tos = tos;
D
David Ahern 已提交
32
	fl4->flowi4_oif = oif;
33
	if (saddr)
34
		fl4->saddr = saddr->a4;
35

36 37
	fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;

38
	rt = __ip_route_output_key(net, fl4);
39 40 41 42
	if (!IS_ERR(rt))
		return &rt->dst;

	return ERR_CAST(rt);
43 44
}

D
David Ahern 已提交
45
static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
46 47 48 49 50
					  const xfrm_address_t *saddr,
					  const xfrm_address_t *daddr)
{
	struct flowi4 fl4;

D
David Ahern 已提交
51
	return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
52 53
}

D
David Ahern 已提交
54
static int xfrm4_get_saddr(struct net *net, int oif,
A
Alexey Dobriyan 已提交
55
			   xfrm_address_t *saddr, xfrm_address_t *daddr)
56 57
{
	struct dst_entry *dst;
58
	struct flowi4 fl4;
59

D
David Ahern 已提交
60
	dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
61 62 63
	if (IS_ERR(dst))
		return -EHOSTUNREACH;

64
	saddr->a4 = fl4.saddr;
65 66
	dst_release(dst);
	return 0;
67 68
}

69
static int xfrm4_get_tos(const struct flowi *fl)
L
Linus Torvalds 已提交
70
{
71
	return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */
72
}
L
Linus Torvalds 已提交
73

74 75 76 77 78 79
static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
			   int nfheader_len)
{
	return 0;
}

H
Herbert Xu 已提交
80
static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
81
			  const struct flowi *fl)
82 83
{
	struct rtable *rt = (struct rtable *)xdst->route;
84
	const struct flowi4 *fl4 = &fl->u.ip4;
L
Linus Torvalds 已提交
85

86
	xdst->u.rt.rt_iif = fl4->flowi4_iif;
L
Linus Torvalds 已提交
87

88 89
	xdst->u.dst.dev = dev;
	dev_hold(dev);
90

91 92
	/* Sheit... I remember I did this right. Apparently,
	 * it was magically lost, so this code needs audit */
93
	xdst->u.rt.rt_is_input = rt->rt_is_input;
94 95 96 97
	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
					      RTCF_LOCAL);
	xdst->u.rt.rt_type = rt->rt_type;
	xdst->u.rt.rt_gateway = rt->rt_gateway;
J
Julian Anastasov 已提交
98
	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
99
	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
D
David Ahern 已提交
100
	xdst->u.rt.rt_table_id = rt->rt_table_id;
101
	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
L
Linus Torvalds 已提交
102 103 104 105 106

	return 0;
}

static void
107
_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
L
Linus Torvalds 已提交
108
{
109
	const struct iphdr *iph = ip_hdr(skb);
110
	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
111
	struct flowi4 *fl4 = &fl->u.ip4;
112 113
	int oif = 0;

114 115 116 117
	if (skb_dst(skb)) {
		oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
			: skb_dst(skb)->dev->ifindex;
	}
L
Linus Torvalds 已提交
118

119 120
	memset(fl4, 0, sizeof(struct flowi4));
	fl4->flowi4_mark = skb->mark;
121
	fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
P
Peter Kosyh 已提交
122

123
	if (!ip_is_fragment(iph)) {
L
Linus Torvalds 已提交
124 125
		switch (iph->protocol) {
		case IPPROTO_UDP:
126
		case IPPROTO_UDPLITE:
L
Linus Torvalds 已提交
127 128
		case IPPROTO_TCP:
		case IPPROTO_SCTP:
129
		case IPPROTO_DCCP:
130 131
			if (xprth + 4 < skb->data ||
			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
A
Al Viro 已提交
132
				__be16 *ports = (__be16 *)xprth;
L
Linus Torvalds 已提交
133

134 135
				fl4->fl4_sport = ports[!!reverse];
				fl4->fl4_dport = ports[!reverse];
L
Linus Torvalds 已提交
136 137 138 139 140 141 142
			}
			break;

		case IPPROTO_ICMP:
			if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
				u8 *icmp = xprth;

143 144
				fl4->fl4_icmp_type = icmp[0];
				fl4->fl4_icmp_code = icmp[1];
L
Linus Torvalds 已提交
145 146 147 148 149
			}
			break;

		case IPPROTO_ESP:
			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
A
Al Viro 已提交
150
				__be32 *ehdr = (__be32 *)xprth;
L
Linus Torvalds 已提交
151

152
				fl4->fl4_ipsec_spi = ehdr[0];
L
Linus Torvalds 已提交
153 154 155 156 157
			}
			break;

		case IPPROTO_AH:
			if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
D
Daniel Baluta 已提交
158
				__be32 *ah_hdr = (__be32 *)xprth;
L
Linus Torvalds 已提交
159

160
				fl4->fl4_ipsec_spi = ah_hdr[1];
L
Linus Torvalds 已提交
161 162 163 164 165
			}
			break;

		case IPPROTO_COMP:
			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
A
Al Viro 已提交
166
				__be16 *ipcomp_hdr = (__be16 *)xprth;
L
Linus Torvalds 已提交
167

168
				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
L
Linus Torvalds 已提交
169 170
			}
			break;
171 172 173 174 175 176 177 178 179

		case IPPROTO_GRE:
			if (pskb_may_pull(skb, xprth + 12 - skb->data)) {
				__be16 *greflags = (__be16 *)xprth;
				__be32 *gre_hdr = (__be32 *)xprth;

				if (greflags[0] & GRE_KEY) {
					if (greflags[0] & GRE_CSUM)
						gre_hdr++;
180
					fl4->fl4_gre_key = gre_hdr[1];
181 182 183 184
				}
			}
			break;

L
Linus Torvalds 已提交
185
		default:
186
			fl4->fl4_ipsec_spi = 0;
L
Linus Torvalds 已提交
187
			break;
188
		}
L
Linus Torvalds 已提交
189
	}
190 191 192 193
	fl4->flowi4_proto = iph->protocol;
	fl4->daddr = reverse ? iph->saddr : iph->daddr;
	fl4->saddr = reverse ? iph->daddr : iph->saddr;
	fl4->flowi4_tos = iph->tos;
L
Linus Torvalds 已提交
194 195
}

196
static inline int xfrm4_garbage_collect(struct dst_ops *ops)
L
Linus Torvalds 已提交
197
{
198 199 200
	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);

	xfrm4_policy_afinfo.garbage_collect(net);
201
	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
L
Linus Torvalds 已提交
202 203
}

204 205
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
			      struct sk_buff *skb, u32 mtu)
L
Linus Torvalds 已提交
206 207 208 209
{
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	struct dst_entry *path = xdst->route;

210
	path->ops->update_pmtu(path, sk, skb, mtu);
L
Linus Torvalds 已提交
211 212
}

213 214
static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
			   struct sk_buff *skb)
215 216 217 218
{
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
	struct dst_entry *path = xdst->route;

219
	path->ops->redirect(path, sk, skb);
220 221
}

H
Herbert Xu 已提交
222 223 224 225
static void xfrm4_dst_destroy(struct dst_entry *dst)
{
	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;

226 227
	dst_destroy_metrics_generic(dst);

H
Herbert Xu 已提交
228 229 230 231 232 233 234 235 236 237 238 239
	xfrm_dst_destroy(xdst);
}

static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
			     int unregister)
{
	if (!unregister)
		return;

	xfrm_dst_ifdown(dst, dev);
}

L
Linus Torvalds 已提交
240 241 242 243
static struct dst_ops xfrm4_dst_ops = {
	.family =		AF_INET,
	.gc =			xfrm4_garbage_collect,
	.update_pmtu =		xfrm4_update_pmtu,
244
	.redirect =		xfrm4_redirect,
245
	.cow_metrics =		dst_cow_metrics_generic,
H
Herbert Xu 已提交
246 247
	.destroy =		xfrm4_dst_destroy,
	.ifdown =		xfrm4_dst_ifdown,
248
	.local_out =		__ip_local_out,
249
	.gc_thresh =		INT_MAX,
L
Linus Torvalds 已提交
250 251 252 253 254 255
};

static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
	.family = 		AF_INET,
	.dst_ops =		&xfrm4_dst_ops,
	.dst_lookup =		xfrm4_dst_lookup,
256
	.get_saddr =		xfrm4_get_saddr,
L
Linus Torvalds 已提交
257
	.decode_session =	_decode_session4,
258
	.get_tos =		xfrm4_get_tos,
259
	.init_path =		xfrm4_init_path,
260
	.fill_dst =		xfrm4_fill_dst,
261
	.blackhole_route =	ipv4_blackhole_route,
L
Linus Torvalds 已提交
262 263
};

264
#ifdef CONFIG_SYSCTL
265 266 267
static struct ctl_table xfrm4_policy_table[] = {
	{
		.procname       = "xfrm4_gc_thresh",
268
		.data           = &init_net.xfrm.xfrm4_dst_ops.gc_thresh,
269 270 271 272 273 274 275
		.maxlen         = sizeof(int),
		.mode           = 0644,
		.proc_handler   = proc_dointvec,
	},
	{ }
};

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
static int __net_init xfrm4_net_init(struct net *net)
{
	struct ctl_table *table;
	struct ctl_table_header *hdr;

	table = xfrm4_policy_table;
	if (!net_eq(net, &init_net)) {
		table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL);
		if (!table)
			goto err_alloc;

		table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh;
	}

	hdr = register_net_sysctl(net, "net/ipv4", table);
	if (!hdr)
		goto err_reg;

	net->ipv4.xfrm4_hdr = hdr;
	return 0;

err_reg:
	if (!net_eq(net, &init_net))
		kfree(table);
err_alloc:
	return -ENOMEM;
}

static void __net_exit xfrm4_net_exit(struct net *net)
{
	struct ctl_table *table;

308
	if (!net->ipv4.xfrm4_hdr)
309 310 311 312 313 314 315 316 317 318 319 320
		return;

	table = net->ipv4.xfrm4_hdr->ctl_table_arg;
	unregister_net_sysctl_table(net->ipv4.xfrm4_hdr);
	if (!net_eq(net, &init_net))
		kfree(table);
}

static struct pernet_operations __net_initdata xfrm4_net_ops = {
	.init	= xfrm4_net_init,
	.exit	= xfrm4_net_exit,
};
321
#endif
322

L
Linus Torvalds 已提交
323 324 325 326 327
static void __init xfrm4_policy_init(void)
{
	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
}

328
void __init xfrm4_init(void)
L
Linus Torvalds 已提交
329
{
330
	dst_entries_init(&xfrm4_dst_ops);
331 332 333

	xfrm4_state_init();
	xfrm4_policy_init();
334
	xfrm4_protocol_init();
335
#ifdef CONFIG_SYSCTL
336
	register_pernet_subsys(&xfrm4_net_ops);
337
#endif
L
Linus Torvalds 已提交
338 339
}