ip6_tunnel.c 43.0 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 *	IPv6 tunneling device
L
Linus Torvalds 已提交
3 4 5
 *	Linux INET6 implementation
 *
 *	Authors:
6
 *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
7
 *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
L
Linus Torvalds 已提交
8 9
 *
 *      Based on:
10
 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18 19 20
 *
 *      RFC 2473
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 */

21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

L
Linus Torvalds 已提交
23
#include <linux/module.h>
24
#include <linux/capability.h>
L
Linus Torvalds 已提交
25 26 27
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
28
#include <linux/icmp.h>
L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/if_tunnel.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/route.h>
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
42
#include <linux/slab.h>
E
Eric Dumazet 已提交
43
#include <linux/hash.h>
L
Linus Torvalds 已提交
44 45

#include <asm/uaccess.h>
A
Arun Sharma 已提交
46
#include <linux/atomic.h>
L
Linus Torvalds 已提交
47

48
#include <net/icmp.h>
L
Linus Torvalds 已提交
49 50 51 52 53 54 55 56
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/ip6_tunnel.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
57 58
#include <net/net_namespace.h>
#include <net/netns/generic.h>
L
Linus Torvalds 已提交
59 60

MODULE_AUTHOR("Ville Nuorvala");
61
MODULE_DESCRIPTION("IPv6 tunneling device");
L
Linus Torvalds 已提交
62
MODULE_LICENSE("GPL");
S
stephen hemminger 已提交
63
MODULE_ALIAS_NETDEV("ip6tnl0");
L
Linus Torvalds 已提交
64 65

#ifdef IP6_TNL_DEBUG
66
#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__)
L
Linus Torvalds 已提交
67 68 69 70 71
#else
#define IP6_TNL_TRACE(x...) do {;} while(0)
#endif

#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
72
#define IPV6_TCLASS_SHIFT 20
L
Linus Torvalds 已提交
73

E
Eric Dumazet 已提交
74 75
#define HASH_SIZE_SHIFT  5
#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
L
Linus Torvalds 已提交
76

E
Eric Dumazet 已提交
77 78 79 80 81 82
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);

	return hash_32(hash, HASH_SIZE_SHIFT);
}
L
Linus Torvalds 已提交
83

E
Eric Dumazet 已提交
84
static int ip6_tnl_dev_init(struct net_device *dev);
85
static void ip6_tnl_dev_setup(struct net_device *dev);
86
static struct rtnl_link_ops ip6_link_ops __read_mostly;
L
Linus Torvalds 已提交
87

88
static int ip6_tnl_net_id __read_mostly;
89
struct ip6_tnl_net {
90 91
	/* the IPv6 tunnel fallback device */
	struct net_device *fb_tnl_dev;
92
	/* lists for storing tunnels in use */
E
Eric Dumazet 已提交
93 94 95
	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
	struct ip6_tnl __rcu *tnls_wc[1];
	struct ip6_tnl __rcu **tnls[2];
96 97
};

E
Eric Dumazet 已提交
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
	struct pcpu_tstats sum = { 0 };
	int i;

	for_each_possible_cpu(i) {
		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);

		sum.rx_packets += tstats->rx_packets;
		sum.rx_bytes   += tstats->rx_bytes;
		sum.tx_packets += tstats->tx_packets;
		sum.tx_bytes   += tstats->tx_bytes;
	}
	dev->stats.rx_packets = sum.rx_packets;
	dev->stats.rx_bytes   = sum.rx_bytes;
	dev->stats.tx_packets = sum.tx_packets;
	dev->stats.tx_bytes   = sum.tx_bytes;
	return &dev->stats;
}

118
/*
E
Eric Dumazet 已提交
119
 * Locking : hash tables are protected by RCU and RTNL
120
 */
L
Linus Torvalds 已提交
121

X
xeb@mail.ru 已提交
122
struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
L
Linus Torvalds 已提交
123 124 125
{
	struct dst_entry *dst = t->dst_cache;

126
	if (dst && dst->obsolete &&
L
Linus Torvalds 已提交
127 128 129 130 131 132 133 134
	    dst->ops->check(dst, t->dst_cookie) == NULL) {
		t->dst_cache = NULL;
		dst_release(dst);
		return NULL;
	}

	return dst;
}
X
xeb@mail.ru 已提交
135
EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
L
Linus Torvalds 已提交
136

X
xeb@mail.ru 已提交
137
void ip6_tnl_dst_reset(struct ip6_tnl *t)
L
Linus Torvalds 已提交
138 139 140 141
{
	dst_release(t->dst_cache);
	t->dst_cache = NULL;
}
X
xeb@mail.ru 已提交
142
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
L
Linus Torvalds 已提交
143

X
xeb@mail.ru 已提交
144
void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
L
Linus Torvalds 已提交
145 146 147 148 149 150
{
	struct rt6_info *rt = (struct rt6_info *) dst;
	t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
	dst_release(t->dst_cache);
	t->dst_cache = dst;
}
X
xeb@mail.ru 已提交
151
EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
L
Linus Torvalds 已提交
152 153

/**
154
 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
155 156
 *   @remote: the address of the tunnel exit-point
 *   @local: the address of the tunnel entry-point
L
Linus Torvalds 已提交
157
 *
158
 * Return:
L
Linus Torvalds 已提交
159
 *   tunnel matching given end-points if found,
160
 *   else fallback tunnel if its device is up,
L
Linus Torvalds 已提交
161 162 163
 *   else %NULL
 **/

164 165 166
#define for_each_ip6_tunnel_rcu(start) \
	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))

L
Linus Torvalds 已提交
167
static struct ip6_tnl *
168
ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
L
Linus Torvalds 已提交
169
{
E
Eric Dumazet 已提交
170
	unsigned int hash = HASH(remote, local);
L
Linus Torvalds 已提交
171
	struct ip6_tnl *t;
172
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
L
Linus Torvalds 已提交
173

E
Eric Dumazet 已提交
174
	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
L
Linus Torvalds 已提交
175 176 177 178 179
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
		    ipv6_addr_equal(remote, &t->parms.raddr) &&
		    (t->dev->flags & IFF_UP))
			return t;
	}
180 181
	t = rcu_dereference(ip6n->tnls_wc[0]);
	if (t && (t->dev->flags & IFF_UP))
L
Linus Torvalds 已提交
182 183 184 185 186 187
		return t;

	return NULL;
}

/**
188
 * ip6_tnl_bucket - get head of list matching given tunnel parameters
189
 *   @p: parameters containing tunnel end-points
L
Linus Torvalds 已提交
190 191
 *
 * Description:
192
 *   ip6_tnl_bucket() returns the head of the list matching the
L
Linus Torvalds 已提交
193 194
 *   &struct in6_addr entries laddr and raddr in @p.
 *
195
 * Return: head of IPv6 tunnel list
L
Linus Torvalds 已提交
196 197
 **/

E
Eric Dumazet 已提交
198
static struct ip6_tnl __rcu **
X
xeb@mail.ru 已提交
199
ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
L
Linus Torvalds 已提交
200
{
201 202
	const struct in6_addr *remote = &p->raddr;
	const struct in6_addr *local = &p->laddr;
203
	unsigned int h = 0;
L
Linus Torvalds 已提交
204 205 206 207
	int prio = 0;

	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
		prio = 1;
E
Eric Dumazet 已提交
208
		h = HASH(remote, local);
L
Linus Torvalds 已提交
209
	}
210
	return &ip6n->tnls[prio][h];
L
Linus Torvalds 已提交
211 212 213
}

/**
214
 * ip6_tnl_link - add tunnel to hash table
L
Linus Torvalds 已提交
215 216 217 218
 *   @t: tunnel to be added
 **/

static void
219
ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
L
Linus Torvalds 已提交
220
{
E
Eric Dumazet 已提交
221
	struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
L
Linus Torvalds 已提交
222

223 224
	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
	rcu_assign_pointer(*tp, t);
L
Linus Torvalds 已提交
225 226 227
}

/**
228
 * ip6_tnl_unlink - remove tunnel from hash table
L
Linus Torvalds 已提交
229 230 231 232
 *   @t: tunnel to be removed
 **/

static void
233
ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
L
Linus Torvalds 已提交
234
{
E
Eric Dumazet 已提交
235 236 237 238 239 240 241
	struct ip6_tnl __rcu **tp;
	struct ip6_tnl *iter;

	for (tp = ip6_tnl_bucket(ip6n, &t->parms);
	     (iter = rtnl_dereference(*tp)) != NULL;
	     tp = &iter->next) {
		if (t == iter) {
242
			rcu_assign_pointer(*tp, t->next);
L
Linus Torvalds 已提交
243 244 245 246 247
			break;
		}
	}
}

E
Eric Dumazet 已提交
248 249 250 251 252 253
static void ip6_dev_free(struct net_device *dev)
{
	free_percpu(dev->tstats);
	free_netdev(dev);
}

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
static int ip6_tnl_create2(struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
	int err;

	t = netdev_priv(dev);
	err = ip6_tnl_dev_init(dev);
	if (err < 0)
		goto out;

	err = register_netdevice(dev);
	if (err < 0)
		goto out;

	strcpy(t->parms.name, dev->name);
	dev->rtnl_link_ops = &ip6_link_ops;

	dev_hold(dev);
	ip6_tnl_link(ip6n, t);
	return 0;

out:
	return err;
}

L
Linus Torvalds 已提交
281
/**
282
 * ip6_tnl_create - create a new tunnel
L
Linus Torvalds 已提交
283 284 285 286 287
 *   @p: tunnel parameters
 *   @pt: pointer to new tunnel
 *
 * Description:
 *   Create tunnel matching given parameters.
288 289
 *
 * Return:
290
 *   created tunnel or NULL
L
Linus Torvalds 已提交
291 292
 **/

X
xeb@mail.ru 已提交
293
static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
L
Linus Torvalds 已提交
294 295 296 297 298 299
{
	struct net_device *dev;
	struct ip6_tnl *t;
	char name[IFNAMSIZ];
	int err;

300
	if (p->name[0])
L
Linus Torvalds 已提交
301
		strlcpy(name, p->name, IFNAMSIZ);
302 303 304
	else
		sprintf(name, "ip6tnl%%d");

305
	dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
L
Linus Torvalds 已提交
306
	if (dev == NULL)
307
		goto failed;
L
Linus Torvalds 已提交
308

309 310
	dev_net_set(dev, net);

311
	t = netdev_priv(dev);
L
Linus Torvalds 已提交
312
	t->parms = *p;
313
	err = ip6_tnl_create2(dev);
E
Eric Dumazet 已提交
314 315
	if (err < 0)
		goto failed_free;
L
Linus Torvalds 已提交
316

317
	return t;
318 319

failed_free:
E
Eric Dumazet 已提交
320
	ip6_dev_free(dev);
321 322
failed:
	return NULL;
L
Linus Torvalds 已提交
323 324 325
}

/**
326
 * ip6_tnl_locate - find or create tunnel matching given parameters
327
 *   @p: tunnel parameters
L
Linus Torvalds 已提交
328 329 330
 *   @create: != 0 if allowed to create new tunnel if no match found
 *
 * Description:
331
 *   ip6_tnl_locate() first tries to locate an existing tunnel
L
Linus Torvalds 已提交
332 333 334 335
 *   based on @parms. If this is unsuccessful, but @create is set a new
 *   tunnel device is created and registered for use.
 *
 * Return:
336
 *   matching tunnel or NULL
L
Linus Torvalds 已提交
337 338
 **/

339
static struct ip6_tnl *ip6_tnl_locate(struct net *net,
X
xeb@mail.ru 已提交
340
		struct __ip6_tnl_parm *p, int create)
L
Linus Torvalds 已提交
341
{
342 343
	const struct in6_addr *remote = &p->raddr;
	const struct in6_addr *local = &p->laddr;
E
Eric Dumazet 已提交
344
	struct ip6_tnl __rcu **tp;
L
Linus Torvalds 已提交
345
	struct ip6_tnl *t;
346
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
L
Linus Torvalds 已提交
347

E
Eric Dumazet 已提交
348 349 350
	for (tp = ip6_tnl_bucket(ip6n, p);
	     (t = rtnl_dereference(*tp)) != NULL;
	     tp = &t->next) {
L
Linus Torvalds 已提交
351
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
352 353
		    ipv6_addr_equal(remote, &t->parms.raddr))
			return t;
L
Linus Torvalds 已提交
354 355
	}
	if (!create)
356
		return NULL;
357
	return ip6_tnl_create(net, p);
L
Linus Torvalds 已提交
358 359 360
}

/**
361
 * ip6_tnl_dev_uninit - tunnel device uninitializer
L
Linus Torvalds 已提交
362
 *   @dev: the device to be destroyed
363
 *
L
Linus Torvalds 已提交
364
 * Description:
365
 *   ip6_tnl_dev_uninit() removes tunnel from its list
L
Linus Torvalds 已提交
366 367 368
 **/

static void
369
ip6_tnl_dev_uninit(struct net_device *dev)
L
Linus Torvalds 已提交
370
{
371
	struct ip6_tnl *t = netdev_priv(dev);
372 373
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
L
Linus Torvalds 已提交
374

E
Eric Dumazet 已提交
375
	if (dev == ip6n->fb_tnl_dev)
376
		RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
E
Eric Dumazet 已提交
377
	else
378
		ip6_tnl_unlink(ip6n, t);
L
Linus Torvalds 已提交
379 380 381 382 383 384 385 386
	ip6_tnl_dst_reset(t);
	dev_put(dev);
}

/**
 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 *   @skb: received socket buffer
 *
387 388
 * Return:
 *   0 if none was found,
L
Linus Torvalds 已提交
389 390 391
 *   else index to encapsulation limit
 **/

X
xeb@mail.ru 已提交
392
__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
L
Linus Torvalds 已提交
393
{
394
	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
L
Linus Torvalds 已提交
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
	__u8 nexthdr = ipv6h->nexthdr;
	__u16 off = sizeof (*ipv6h);

	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
		__u16 optlen = 0;
		struct ipv6_opt_hdr *hdr;
		if (raw + off + sizeof (*hdr) > skb->data &&
		    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
			break;

		hdr = (struct ipv6_opt_hdr *) (raw + off);
		if (nexthdr == NEXTHDR_FRAGMENT) {
			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
			if (frag_hdr->frag_off)
				break;
			optlen = 8;
		} else if (nexthdr == NEXTHDR_AUTH) {
			optlen = (hdr->hdrlen + 2) << 2;
		} else {
			optlen = ipv6_optlen(hdr);
		}
		if (nexthdr == NEXTHDR_DEST) {
			__u16 i = off + 2;
			while (1) {
				struct ipv6_tlv_tnl_enc_lim *tel;

				/* No more room for encapsulation limit */
				if (i + sizeof (*tel) > off + optlen)
					break;

				tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
				/* return index of option if found and valid */
				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
				    tel->length == 1)
					return i;
				/* else jump to next option */
				if (tel->type)
					i += tel->length + 2;
				else
					i++;
			}
		}
		nexthdr = hdr->nexthdr;
		off += optlen;
	}
	return 0;
}
X
xeb@mail.ru 已提交
442
EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
L
Linus Torvalds 已提交
443 444

/**
445
 * ip6_tnl_err - tunnel error handler
L
Linus Torvalds 已提交
446 447
 *
 * Description:
448
 *   ip6_tnl_err() should handle errors in the tunnel according
L
Linus Torvalds 已提交
449 450 451
 *   to the specifications in RFC 2473.
 **/

H
Herbert Xu 已提交
452
static int
453
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
454
	    u8 *type, u8 *code, int *msg, __u32 *info, int offset)
L
Linus Torvalds 已提交
455
{
456
	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
L
Linus Torvalds 已提交
457 458
	struct ip6_tnl *t;
	int rel_msg = 0;
459 460
	u8 rel_type = ICMPV6_DEST_UNREACH;
	u8 rel_code = ICMPV6_ADDR_UNREACH;
L
Linus Torvalds 已提交
461 462
	__u32 rel_info = 0;
	__u16 len;
H
Herbert Xu 已提交
463
	int err = -ENOENT;
L
Linus Torvalds 已提交
464

465 466
	/* If the packet doesn't contain the original IPv6 header we are
	   in trouble since we might need the source address for further
L
Linus Torvalds 已提交
467 468
	   processing of the error. */

469
	rcu_read_lock();
470
	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
471
					&ipv6h->saddr)) == NULL)
L
Linus Torvalds 已提交
472 473
		goto out;

474 475 476
	if (t->parms.proto != ipproto && t->parms.proto != 0)
		goto out;

H
Herbert Xu 已提交
477 478
	err = 0;

479
	switch (*type) {
L
Linus Torvalds 已提交
480 481 482 483
		__u32 teli;
		struct ipv6_tlv_tnl_enc_lim *tel;
		__u32 mtu;
	case ICMPV6_DEST_UNREACH:
484 485
		net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
				     t->parms.name);
L
Linus Torvalds 已提交
486 487 488
		rel_msg = 1;
		break;
	case ICMPV6_TIME_EXCEED:
489
		if ((*code) == ICMPV6_EXC_HOPLIMIT) {
490 491
			net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
					     t->parms.name);
L
Linus Torvalds 已提交
492 493 494 495
			rel_msg = 1;
		}
		break;
	case ICMPV6_PARAMPROB:
496
		teli = 0;
497
		if ((*code) == ICMPV6_HDR_FIELD)
X
xeb@mail.ru 已提交
498
			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
L
Linus Torvalds 已提交
499

A
Al Viro 已提交
500
		if (teli && teli == *info - 2) {
L
Linus Torvalds 已提交
501 502
			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
			if (tel->encap_limit == 0) {
503 504
				net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
						     t->parms.name);
L
Linus Torvalds 已提交
505 506
				rel_msg = 1;
			}
507 508 509
		} else {
			net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
					     t->parms.name);
L
Linus Torvalds 已提交
510 511 512
		}
		break;
	case ICMPV6_PKT_TOOBIG:
A
Al Viro 已提交
513
		mtu = *info - offset;
L
Linus Torvalds 已提交
514 515 516 517
		if (mtu < IPV6_MIN_MTU)
			mtu = IPV6_MIN_MTU;
		t->dev->mtu = mtu;

A
Al Viro 已提交
518
		if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
L
Linus Torvalds 已提交
519 520 521 522 523 524 525
			rel_type = ICMPV6_PKT_TOOBIG;
			rel_code = 0;
			rel_info = mtu;
			rel_msg = 1;
		}
		break;
	}
526 527 528 529 530 531 532

	*type = rel_type;
	*code = rel_code;
	*info = rel_info;
	*msg = rel_msg;

out:
533
	rcu_read_unlock();
534 535 536
	return err;
}

537 538
static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
539
	   u8 type, u8 code, int offset, __be32 info)
540 541
{
	int rel_msg = 0;
542 543
	u8 rel_type = type;
	u8 rel_code = code;
A
Al Viro 已提交
544
	__u32 rel_info = ntohl(info);
545 546
	int err;
	struct sk_buff *skb2;
547
	const struct iphdr *eiph;
548
	struct rtable *rt;
549
	struct flowi4 fl4;
550

551 552
	err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
	if (err < 0)
		return err;

	if (rel_msg == 0)
		return 0;

	switch (rel_type) {
	case ICMPV6_DEST_UNREACH:
		if (rel_code != ICMPV6_ADDR_UNREACH)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_HOST_UNREACH;
		break;
	case ICMPV6_PKT_TOOBIG:
		if (rel_code != 0)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_FRAG_NEEDED;
		break;
572 573 574
	case NDISC_REDIRECT:
		rel_type = ICMP_REDIRECT;
		rel_code = ICMP_REDIR_HOST;
575 576 577 578 579 580 581 582 583 584 585
	default:
		return 0;
	}

	if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
		return 0;

	skb2 = skb_clone(skb, GFP_ATOMIC);
	if (!skb2)
		return 0;

E
Eric Dumazet 已提交
586 587
	skb_dst_drop(skb2);

588
	skb_pull(skb2, offset);
589
	skb_reset_network_header(skb2);
590
	eiph = ip_hdr(skb2);
591 592

	/* Try to guess incoming interface */
593
	rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
594 595 596
				   eiph->saddr, 0,
				   0, 0,
				   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
597
	if (IS_ERR(rt))
598 599
		goto out;

600
	skb2->dev = rt->dst.dev;
601 602 603 604 605

	/* route "incoming" packet */
	if (rt->rt_flags & RTCF_LOCAL) {
		ip_rt_put(rt);
		rt = NULL;
606
		rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
607 608 609 610
					   eiph->daddr, eiph->saddr,
					   0, 0,
					   IPPROTO_IPIP,
					   RT_TOS(eiph->tos), 0);
611
		if (IS_ERR(rt) ||
612
		    rt->dst.dev->type != ARPHRD_TUNNEL) {
613 614
			if (!IS_ERR(rt))
				ip_rt_put(rt);
615 616
			goto out;
		}
617
		skb_dst_set(skb2, &rt->dst);
618 619 620 621
	} else {
		ip_rt_put(rt);
		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
				   skb2->dev) ||
E
Eric Dumazet 已提交
622
		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
623 624 625 626 627
			goto out;
	}

	/* change mtu on this route */
	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
E
Eric Dumazet 已提交
628
		if (rel_info > dst_mtu(skb_dst(skb2)))
629 630
			goto out;

631
		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
632
	}
633
	if (rel_type == ICMP_REDIRECT)
634
		skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
635

A
Al Viro 已提交
636
	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
637 638 639 640 641 642

out:
	kfree_skb(skb2);
	return 0;
}

643 644
static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
645
	   u8 type, u8 code, int offset, __be32 info)
646 647
{
	int rel_msg = 0;
648 649
	u8 rel_type = type;
	u8 rel_code = code;
A
Al Viro 已提交
650
	__u32 rel_info = ntohl(info);
651 652
	int err;

653 654
	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
655 656 657 658
	if (err < 0)
		return err;

	if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
L
Linus Torvalds 已提交
659 660
		struct rt6_info *rt;
		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
661

L
Linus Torvalds 已提交
662
		if (!skb2)
663
			return 0;
L
Linus Torvalds 已提交
664

E
Eric Dumazet 已提交
665
		skb_dst_drop(skb2);
L
Linus Torvalds 已提交
666
		skb_pull(skb2, offset);
667
		skb_reset_network_header(skb2);
L
Linus Torvalds 已提交
668 669

		/* Try to guess incoming interface */
670 671
		rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
				NULL, 0, 0);
L
Linus Torvalds 已提交
672

673 674
		if (rt && rt->dst.dev)
			skb2->dev = rt->dst.dev;
L
Linus Torvalds 已提交
675

676
		icmpv6_send(skb2, rel_type, rel_code, rel_info);
L
Linus Torvalds 已提交
677

A
Amerigo Wang 已提交
678
		ip6_rt_put(rt);
L
Linus Torvalds 已提交
679 680 681

		kfree_skb(skb2);
	}
682 683

	return 0;
L
Linus Torvalds 已提交
684 685
}

686 687
static void ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
					const struct ipv6hdr *ipv6h,
688 689 690 691 692
					struct sk_buff *skb)
{
	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;

	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
693
		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
694 695

	if (INET_ECN_is_ce(dsfield))
696
		IP_ECN_set_ce(ip_hdr(skb));
697 698
}

699 700
static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
					const struct ipv6hdr *ipv6h,
701
					struct sk_buff *skb)
L
Linus Torvalds 已提交
702
{
703
	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
704
		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
L
Linus Torvalds 已提交
705

706
	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
707
		IP6_ECN_set_ce(ipv6_hdr(skb));
L
Linus Torvalds 已提交
708
}
709

X
xeb@mail.ru 已提交
710
__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
711 712 713
			     const struct in6_addr *laddr,
			     const struct in6_addr *raddr)
{
X
xeb@mail.ru 已提交
714
	struct __ip6_tnl_parm *p = &t->parms;
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
	int ltype = ipv6_addr_type(laddr);
	int rtype = ipv6_addr_type(raddr);
	__u32 flags = 0;

	if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
		flags = IP6_TNL_F_CAP_PER_PACKET;
	} else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
		   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
		   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
		   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
		if (ltype&IPV6_ADDR_UNICAST)
			flags |= IP6_TNL_F_CAP_XMIT;
		if (rtype&IPV6_ADDR_UNICAST)
			flags |= IP6_TNL_F_CAP_RCV;
	}
	return flags;
}
X
xeb@mail.ru 已提交
732
EXPORT_SYMBOL(ip6_tnl_get_cap);
733

E
Eric Dumazet 已提交
734
/* called with rcu_read_lock() */
X
xeb@mail.ru 已提交
735
int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
736 737
				  const struct in6_addr *laddr,
				  const struct in6_addr *raddr)
738
{
X
xeb@mail.ru 已提交
739
	struct __ip6_tnl_parm *p = &t->parms;
740
	int ret = 0;
741
	struct net *net = dev_net(t->dev);
742

743 744 745
	if ((p->flags & IP6_TNL_F_CAP_RCV) ||
	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
746
		struct net_device *ldev = NULL;
747 748

		if (p->link)
E
Eric Dumazet 已提交
749
			ldev = dev_get_by_index_rcu(net, p->link);
750

751 752 753
		if ((ipv6_addr_is_multicast(laddr) ||
		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
		    likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
754 755 756 757
			ret = 1;
	}
	return ret;
}
X
xeb@mail.ru 已提交
758
EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
L
Linus Torvalds 已提交
759 760

/**
761
 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
L
Linus Torvalds 已提交
762
 *   @skb: received socket buffer
763 764
 *   @protocol: ethernet protocol ID
 *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
L
Linus Torvalds 已提交
765 766 767 768
 *
 * Return: 0
 **/

769
static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
770
		       __u8 ipproto,
771 772
		       void (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
						    const struct ipv6hdr *ipv6h,
773
						    struct sk_buff *skb))
L
Linus Torvalds 已提交
774 775
{
	struct ip6_tnl *t;
776
	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
L
Linus Torvalds 已提交
777

778
	rcu_read_lock();
L
Linus Torvalds 已提交
779

780
	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
781
					&ipv6h->daddr)) != NULL) {
E
Eric Dumazet 已提交
782 783
		struct pcpu_tstats *tstats;

784
		if (t->parms.proto != ipproto && t->parms.proto != 0) {
785
			rcu_read_unlock();
786 787 788
			goto discard;
		}

L
Linus Torvalds 已提交
789
		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
790
			rcu_read_unlock();
791
			goto discard;
L
Linus Torvalds 已提交
792 793
		}

794
		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
795
			t->dev->stats.rx_dropped++;
796
			rcu_read_unlock();
L
Linus Torvalds 已提交
797 798 799
			goto discard;
		}
		secpath_reset(skb);
800
		skb->mac_header = skb->network_header;
801
		skb_reset_network_header(skb);
802
		skb->protocol = htons(protocol);
L
Linus Torvalds 已提交
803 804
		skb->pkt_type = PACKET_HOST;
		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
805

E
Eric Dumazet 已提交
806 807 808 809 810
		tstats = this_cpu_ptr(t->dev->tstats);
		tstats->rx_packets++;
		tstats->rx_bytes += skb->len;

		__skb_tunnel_rx(skb, t->dev);
811

812
		dscp_ecn_decapsulate(t, ipv6h, skb);
E
Eric Dumazet 已提交
813

814
		netif_rx(skb);
E
Eric Dumazet 已提交
815

816
		rcu_read_unlock();
L
Linus Torvalds 已提交
817 818
		return 0;
	}
819
	rcu_read_unlock();
L
Linus Torvalds 已提交
820
	return 1;
821 822 823 824

discard:
	kfree_skb(skb);
	return 0;
L
Linus Torvalds 已提交
825 826
}

827 828
static int ip4ip6_rcv(struct sk_buff *skb)
{
829 830
	return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
			   ip4ip6_dscp_ecn_decapsulate);
831 832
}

833 834
static int ip6ip6_rcv(struct sk_buff *skb)
{
835 836
	return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
			   ip6ip6_dscp_ecn_decapsulate);
837 838
}

839 840 841 842
struct ipv6_tel_txoption {
	struct ipv6_txoptions ops;
	__u8 dst_opt[8];
};
L
Linus Torvalds 已提交
843

844 845 846
static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
{
	memset(opt, 0, sizeof(struct ipv6_tel_txoption));
L
Linus Torvalds 已提交
847

848 849 850 851 852
	opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
	opt->dst_opt[3] = 1;
	opt->dst_opt[4] = encap_limit;
	opt->dst_opt[5] = IPV6_TLV_PADN;
	opt->dst_opt[6] = 1;
L
Linus Torvalds 已提交
853

854 855
	opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
	opt->ops.opt_nflen = 8;
L
Linus Torvalds 已提交
856 857 858
}

/**
859
 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
L
Linus Torvalds 已提交
860
 *   @t: the outgoing tunnel device
861
 *   @hdr: IPv6 header from the incoming packet
L
Linus Torvalds 已提交
862 863
 *
 * Description:
864
 *   Avoid trivial tunneling loop by checking that tunnel exit-point
L
Linus Torvalds 已提交
865 866
 *   doesn't match source of incoming packet.
 *
867
 * Return:
L
Linus Torvalds 已提交
868 869 870 871
 *   1 if conflict,
 *   0 else
 **/

E
Eric Dumazet 已提交
872
static inline bool
873
ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
L
Linus Torvalds 已提交
874 875 876 877
{
	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}

X
xeb@mail.ru 已提交
878
int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
879
{
X
xeb@mail.ru 已提交
880
	struct __ip6_tnl_parm *p = &t->parms;
881
	int ret = 0;
882
	struct net *net = dev_net(t->dev);
883

884
	if (p->flags & IP6_TNL_F_CAP_XMIT) {
885 886
		struct net_device *ldev = NULL;

E
Eric Dumazet 已提交
887
		rcu_read_lock();
888
		if (p->link)
E
Eric Dumazet 已提交
889
			ldev = dev_get_by_index_rcu(net, p->link);
890

891
		if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
892 893
			pr_warn("%s xmit: Local address not yet configured!\n",
				p->name);
894
		else if (!ipv6_addr_is_multicast(&p->raddr) &&
895
			 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
896 897
			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
				p->name);
898 899
		else
			ret = 1;
E
Eric Dumazet 已提交
900
		rcu_read_unlock();
901 902 903
	}
	return ret;
}
X
xeb@mail.ru 已提交
904 905
EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);

L
Linus Torvalds 已提交
906
/**
907
 * ip6_tnl_xmit2 - encapsulate packet and send
L
Linus Torvalds 已提交
908
 *   @skb: the outgoing socket buffer
909
 *   @dev: the outgoing tunnel device
910 911 912 913
 *   @dsfield: dscp code for outer header
 *   @fl: flow of tunneled packet
 *   @encap_limit: encapsulation limit
 *   @pmtu: Path MTU is stored if packet is too big
L
Linus Torvalds 已提交
914 915 916 917 918
 *
 * Description:
 *   Build new header and do some sanity checks on the packet before sending
 *   it.
 *
919
 * Return:
920
 *   0 on success
921 922
 *   -1 fail
 *   %-EMSGSIZE message too big. return mtu in this case.
L
Linus Torvalds 已提交
923 924
 **/

925 926 927
static int ip6_tnl_xmit2(struct sk_buff *skb,
			 struct net_device *dev,
			 __u8 dsfield,
928
			 struct flowi6 *fl6,
929 930
			 int encap_limit,
			 __u32 *pmtu)
L
Linus Torvalds 已提交
931
{
A
Alexey Dobriyan 已提交
932
	struct net *net = dev_net(dev);
933
	struct ip6_tnl *t = netdev_priv(dev);
934
	struct net_device_stats *stats = &t->dev->stats;
935
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
936
	struct ipv6_tel_txoption opt;
937
	struct dst_entry *dst = NULL, *ndst = NULL;
L
Linus Torvalds 已提交
938 939
	struct net_device *tdev;
	int mtu;
940
	unsigned int max_headroom = sizeof(struct ipv6hdr);
L
Linus Torvalds 已提交
941
	u8 proto;
942
	int err = -1;
L
Linus Torvalds 已提交
943 944
	int pkt_len;

945 946
	if (!fl6->flowi6_mark)
		dst = ip6_tnl_dst_check(t);
947 948
	if (!dst) {
		ndst = ip6_route_output(net, NULL, fl6);
L
Linus Torvalds 已提交
949

950
		if (ndst->error)
951
			goto tx_err_link_failure;
952 953 954 955
		ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
		if (IS_ERR(ndst)) {
			err = PTR_ERR(ndst);
			ndst = NULL;
956 957
			goto tx_err_link_failure;
		}
958
		dst = ndst;
959
	}
L
Linus Torvalds 已提交
960 961 962 963 964

	tdev = dst->dev;

	if (tdev == dev) {
		stats->collisions++;
965 966
		net_warn_ratelimited("%s: Local routing loop detected!\n",
				     t->parms.name);
L
Linus Torvalds 已提交
967 968 969
		goto tx_err_dst_release;
	}
	mtu = dst_mtu(dst) - sizeof (*ipv6h);
970
	if (encap_limit >= 0) {
L
Linus Torvalds 已提交
971 972 973 974 975
		max_headroom += 8;
		mtu -= 8;
	}
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;
E
Eric Dumazet 已提交
976
	if (skb_dst(skb))
977
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
L
Linus Torvalds 已提交
978
	if (skb->len > mtu) {
979 980
		*pmtu = mtu;
		err = -EMSGSIZE;
L
Linus Torvalds 已提交
981 982 983 984 985 986 987
		goto tx_err_dst_release;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom += LL_RESERVED_SPACE(tdev);
988

989 990
	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
L
Linus Torvalds 已提交
991
		struct sk_buff *new_skb;
992

L
Linus Torvalds 已提交
993 994 995 996 997
		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
			goto tx_err_dst_release;

		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
998
		consume_skb(skb);
L
Linus Torvalds 已提交
999 1000
		skb = new_skb;
	}
E
Eric Dumazet 已提交
1001
	skb_dst_drop(skb);
1002 1003 1004 1005 1006 1007
	if (fl6->flowi6_mark) {
		skb_dst_set(skb, dst);
		ndst = NULL;
	} else {
		skb_dst_set_noref(skb, dst);
	}
1008
	skb->transport_header = skb->network_header;
L
Linus Torvalds 已提交
1009

1010
	proto = fl6->flowi6_proto;
1011 1012 1013 1014
	if (encap_limit >= 0) {
		init_tel_txopt(&opt, encap_limit);
		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
	}
1015 1016
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
1017
	ipv6h = ipv6_hdr(skb);
1018
	*(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000);
L
Linus Torvalds 已提交
1019 1020 1021 1022
	dsfield = INET_ECN_encapsulate(0, dsfield);
	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
	ipv6h->hop_limit = t->parms.hop_limit;
	ipv6h->nexthdr = proto;
A
Alexey Dobriyan 已提交
1023 1024
	ipv6h->saddr = fl6->saddr;
	ipv6h->daddr = fl6->daddr;
L
Linus Torvalds 已提交
1025 1026
	nf_reset(skb);
	pkt_len = skb->len;
H
Herbert Xu 已提交
1027
	err = ip6_local_out(skb);
L
Linus Torvalds 已提交
1028

1029
	if (net_xmit_eval(err) == 0) {
E
Eric Dumazet 已提交
1030 1031 1032 1033
		struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);

		tstats->tx_bytes += pkt_len;
		tstats->tx_packets++;
L
Linus Torvalds 已提交
1034 1035 1036 1037
	} else {
		stats->tx_errors++;
		stats->tx_aborted_errors++;
	}
1038 1039
	if (ndst)
		ip6_tnl_dst_store(t, ndst);
L
Linus Torvalds 已提交
1040 1041 1042 1043 1044
	return 0;
tx_err_link_failure:
	stats->tx_carrier_errors++;
	dst_link_failure(skb);
tx_err_dst_release:
1045
	dst_release(ndst);
1046 1047 1048
	return err;
}

1049 1050 1051 1052
static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
1053
	const struct iphdr  *iph = ip_hdr(skb);
1054
	int encap_limit = -1;
1055
	struct flowi6 fl6;
1056 1057 1058 1059
	__u8 dsfield;
	__u32 mtu;
	int err;

1060 1061
	if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
	    !ip6_tnl_xmit_ctl(t))
1062 1063 1064 1065 1066
		return -1;

	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
		encap_limit = t->parms.encap_limit;

1067 1068
	memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
	fl6.flowi6_proto = IPPROTO_IPIP;
1069 1070 1071

	dsfield = ipv4_get_dsfield(iph);

1072
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1073
		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
A
Al Viro 已提交
1074
					  & IPV6_TCLASS_MASK;
1075 1076
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
		fl6.flowi6_mark = skb->mark;
1077

1078
	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
	if (err != 0) {
		/* XXX: send ICMP error even if DF is not set. */
		if (err == -EMSGSIZE)
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
				  htonl(mtu));
		return -1;
	}

	return 0;
}

1090 1091 1092 1093
static inline int
ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
1094
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1095 1096
	int encap_limit = -1;
	__u16 offset;
1097
	struct flowi6 fl6;
1098 1099 1100 1101
	__u8 dsfield;
	__u32 mtu;
	int err;

1102 1103
	if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
	    !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
1104 1105
		return -1;

X
xeb@mail.ru 已提交
1106
	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1107
	if (offset > 0) {
1108
		struct ipv6_tlv_tnl_enc_lim *tel;
1109
		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1110 1111
		if (tel->encap_limit == 0) {
			icmpv6_send(skb, ICMPV6_PARAMPROB,
1112
				    ICMPV6_HDR_FIELD, offset + 2);
1113 1114 1115 1116 1117 1118
			return -1;
		}
		encap_limit = tel->encap_limit - 1;
	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
		encap_limit = t->parms.encap_limit;

1119 1120
	memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6));
	fl6.flowi6_proto = IPPROTO_IPV6;
1121 1122

	dsfield = ipv6_get_dsfield(ipv6h);
1123
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1124
		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1125
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
1126
		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1127 1128
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
		fl6.flowi6_mark = skb->mark;
1129

1130
	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1131 1132
	if (err != 0) {
		if (err == -EMSGSIZE)
1133
			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1134 1135 1136 1137 1138 1139
		return -1;
	}

	return 0;
}

1140
static netdev_tx_t
1141 1142 1143
ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
1144
	struct net_device_stats *stats = &t->dev->stats;
1145 1146 1147
	int ret;

	switch (skb->protocol) {
1148
	case htons(ETH_P_IP):
1149 1150
		ret = ip4ip6_tnl_xmit(skb, dev);
		break;
1151
	case htons(ETH_P_IPV6):
1152 1153 1154 1155 1156 1157 1158 1159 1160
		ret = ip6ip6_tnl_xmit(skb, dev);
		break;
	default:
		goto tx_err;
	}

	if (ret < 0)
		goto tx_err;

1161
	return NETDEV_TX_OK;
1162

L
Linus Torvalds 已提交
1163 1164 1165 1166
tx_err:
	stats->tx_errors++;
	stats->tx_dropped++;
	kfree_skb(skb);
1167
	return NETDEV_TX_OK;
L
Linus Torvalds 已提交
1168 1169
}

1170
static void ip6_tnl_link_config(struct ip6_tnl *t)
L
Linus Torvalds 已提交
1171 1172
{
	struct net_device *dev = t->dev;
X
xeb@mail.ru 已提交
1173
	struct __ip6_tnl_parm *p = &t->parms;
1174
	struct flowi6 *fl6 = &t->fl.u.ip6;
L
Linus Torvalds 已提交
1175

1176 1177
	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
L
Linus Torvalds 已提交
1178 1179

	/* Set up flowi template */
A
Alexey Dobriyan 已提交
1180 1181
	fl6->saddr = p->laddr;
	fl6->daddr = p->raddr;
1182 1183
	fl6->flowi6_oif = p->link;
	fl6->flowlabel = 0;
L
Linus Torvalds 已提交
1184 1185

	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1186
		fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
L
Linus Torvalds 已提交
1187
	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1188
		fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
L
Linus Torvalds 已提交
1189

1190 1191
	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
L
Linus Torvalds 已提交
1192 1193 1194 1195 1196 1197 1198 1199 1200

	if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
		dev->flags |= IFF_POINTOPOINT;
	else
		dev->flags &= ~IFF_POINTOPOINT;

	dev->iflink = p->link;

	if (p->flags & IP6_TNL_F_CAP_XMIT) {
1201 1202 1203
		int strict = (ipv6_addr_type(&p->raddr) &
			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));

1204 1205
		struct rt6_info *rt = rt6_lookup(dev_net(dev),
						 &p->raddr, &p->laddr,
1206
						 p->link, strict);
L
Linus Torvalds 已提交
1207 1208 1209 1210

		if (rt == NULL)
			return;

1211 1212
		if (rt->dst.dev) {
			dev->hard_header_len = rt->dst.dev->hard_header_len +
L
Linus Torvalds 已提交
1213 1214
				sizeof (struct ipv6hdr);

1215
			dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
1216 1217
			if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
				dev->mtu-=8;
L
Linus Torvalds 已提交
1218 1219 1220 1221

			if (dev->mtu < IPV6_MIN_MTU)
				dev->mtu = IPV6_MIN_MTU;
		}
A
Amerigo Wang 已提交
1222
		ip6_rt_put(rt);
L
Linus Torvalds 已提交
1223 1224 1225 1226
	}
}

/**
1227
 * ip6_tnl_change - update the tunnel parameters
L
Linus Torvalds 已提交
1228 1229 1230 1231
 *   @t: tunnel to be changed
 *   @p: tunnel configuration parameters
 *
 * Description:
1232
 *   ip6_tnl_change() updates the tunnel parameters
L
Linus Torvalds 已提交
1233 1234 1235
 **/

static int
X
xeb@mail.ru 已提交
1236
ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
L
Linus Torvalds 已提交
1237
{
A
Alexey Dobriyan 已提交
1238 1239
	t->parms.laddr = p->laddr;
	t->parms.raddr = p->raddr;
L
Linus Torvalds 已提交
1240 1241 1242 1243
	t->parms.flags = p->flags;
	t->parms.hop_limit = p->hop_limit;
	t->parms.encap_limit = p->encap_limit;
	t->parms.flowinfo = p->flowinfo;
1244
	t->parms.link = p->link;
1245
	t->parms.proto = p->proto;
1246
	ip6_tnl_dst_reset(t);
1247
	ip6_tnl_link_config(t);
L
Linus Torvalds 已提交
1248 1249 1250
	return 0;
}

1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
{
	struct net *net = dev_net(t->dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
	int err;

	ip6_tnl_unlink(ip6n, t);
	synchronize_net();
	err = ip6_tnl_change(t, p);
	ip6_tnl_link(ip6n, t);
	netdev_state_change(t->dev);
	return err;
}

X
xeb@mail.ru 已提交
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292
static void
ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
{
	p->laddr = u->laddr;
	p->raddr = u->raddr;
	p->flags = u->flags;
	p->hop_limit = u->hop_limit;
	p->encap_limit = u->encap_limit;
	p->flowinfo = u->flowinfo;
	p->link = u->link;
	p->proto = u->proto;
	memcpy(p->name, u->name, sizeof(u->name));
}

static void
ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
{
	u->laddr = p->laddr;
	u->raddr = p->raddr;
	u->flags = p->flags;
	u->hop_limit = p->hop_limit;
	u->encap_limit = p->encap_limit;
	u->flowinfo = p->flowinfo;
	u->link = p->link;
	u->proto = p->proto;
	memcpy(u->name, p->name, sizeof(u->name));
}

L
Linus Torvalds 已提交
1293
/**
1294
 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
L
Linus Torvalds 已提交
1295 1296 1297 1298 1299
 *   @dev: virtual device associated with tunnel
 *   @ifr: parameters passed from userspace
 *   @cmd: command to be performed
 *
 * Description:
1300
 *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
1301
 *   from userspace.
L
Linus Torvalds 已提交
1302 1303 1304 1305 1306 1307 1308
 *
 *   The possible commands are the following:
 *     %SIOCGETTUNNEL: get tunnel parameters for device
 *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
 *     %SIOCCHGTUNNEL: change tunnel parameters to those given
 *     %SIOCDELTUNNEL: delete tunnel
 *
1309
 *   The fallback device "ip6tnl0", created during module
L
Linus Torvalds 已提交
1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321
 *   initialization, can be used for creating other tunnel devices.
 *
 * Return:
 *   0 on success,
 *   %-EFAULT if unable to copy data to or from userspace,
 *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
 *   %-EINVAL if passed tunnel parameters are invalid,
 *   %-EEXIST if changing a tunnel's parameters would cause a conflict
 *   %-ENODEV if attempting to change or delete a nonexisting device
 **/

static int
1322
ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
L
Linus Torvalds 已提交
1323 1324 1325
{
	int err = 0;
	struct ip6_tnl_parm p;
X
xeb@mail.ru 已提交
1326
	struct __ip6_tnl_parm p1;
L
Linus Torvalds 已提交
1327
	struct ip6_tnl *t = NULL;
1328 1329
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
L
Linus Torvalds 已提交
1330 1331 1332

	switch (cmd) {
	case SIOCGETTUNNEL:
1333
		if (dev == ip6n->fb_tnl_dev) {
1334
			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
L
Linus Torvalds 已提交
1335 1336 1337
				err = -EFAULT;
				break;
			}
X
xeb@mail.ru 已提交
1338 1339
			ip6_tnl_parm_from_user(&p1, &p);
			t = ip6_tnl_locate(net, &p1, 0);
1340 1341
		} else {
			memset(&p, 0, sizeof(p));
1342 1343
		}
		if (t == NULL)
1344
			t = netdev_priv(dev);
X
xeb@mail.ru 已提交
1345
		ip6_tnl_parm_to_user(&p, &t->parms);
L
Linus Torvalds 已提交
1346 1347 1348 1349 1350 1351 1352 1353 1354
		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
			err = -EFAULT;
		}
		break;
	case SIOCADDTUNNEL:
	case SIOCCHGTUNNEL:
		err = -EPERM;
		if (!capable(CAP_NET_ADMIN))
			break;
1355 1356
		err = -EFAULT;
		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
L
Linus Torvalds 已提交
1357
			break;
1358
		err = -EINVAL;
1359 1360
		if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
		    p.proto != 0)
L
Linus Torvalds 已提交
1361
			break;
X
xeb@mail.ru 已提交
1362 1363
		ip6_tnl_parm_from_user(&p1, &p);
		t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1364
		if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
1365 1366 1367 1368 1369 1370 1371 1372
			if (t != NULL) {
				if (t->dev != dev) {
					err = -EEXIST;
					break;
				}
			} else
				t = netdev_priv(dev);

1373
			err = ip6_tnl_update(t, &p1);
L
Linus Torvalds 已提交
1374
		}
1375
		if (t) {
L
Linus Torvalds 已提交
1376
			err = 0;
X
xeb@mail.ru 已提交
1377 1378
			ip6_tnl_parm_to_user(&p, &t->parms);
			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1379 1380 1381 1382
				err = -EFAULT;

		} else
			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
L
Linus Torvalds 已提交
1383 1384 1385 1386 1387 1388
		break;
	case SIOCDELTUNNEL:
		err = -EPERM;
		if (!capable(CAP_NET_ADMIN))
			break;

1389
		if (dev == ip6n->fb_tnl_dev) {
1390 1391
			err = -EFAULT;
			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
L
Linus Torvalds 已提交
1392
				break;
1393
			err = -ENOENT;
X
xeb@mail.ru 已提交
1394 1395 1396
			ip6_tnl_parm_from_user(&p1, &p);
			t = ip6_tnl_locate(net, &p1, 0);
			if (t == NULL)
L
Linus Torvalds 已提交
1397
				break;
1398
			err = -EPERM;
1399
			if (t->dev == ip6n->fb_tnl_dev)
L
Linus Torvalds 已提交
1400
				break;
1401
			dev = t->dev;
L
Linus Torvalds 已提交
1402
		}
1403 1404
		err = 0;
		unregister_netdevice(dev);
L
Linus Torvalds 已提交
1405 1406 1407 1408 1409 1410 1411 1412
		break;
	default:
		err = -EINVAL;
	}
	return err;
}

/**
1413
 * ip6_tnl_change_mtu - change mtu manually for tunnel device
L
Linus Torvalds 已提交
1414 1415 1416 1417 1418 1419 1420 1421 1422
 *   @dev: virtual device associated with tunnel
 *   @new_mtu: the new mtu
 *
 * Return:
 *   0 on success,
 *   %-EINVAL if mtu too small
 **/

static int
1423
ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
L
Linus Torvalds 已提交
1424 1425 1426 1427 1428 1429 1430 1431
{
	if (new_mtu < IPV6_MIN_MTU) {
		return -EINVAL;
	}
	dev->mtu = new_mtu;
	return 0;
}

1432 1433

static const struct net_device_ops ip6_tnl_netdev_ops = {
E
Eric Dumazet 已提交
1434
	.ndo_uninit	= ip6_tnl_dev_uninit,
1435
	.ndo_start_xmit = ip6_tnl_xmit,
E
Eric Dumazet 已提交
1436
	.ndo_do_ioctl	= ip6_tnl_ioctl,
1437
	.ndo_change_mtu = ip6_tnl_change_mtu,
E
Eric Dumazet 已提交
1438
	.ndo_get_stats	= ip6_get_stats,
1439 1440
};

E
Eric Dumazet 已提交
1441

L
Linus Torvalds 已提交
1442
/**
1443
 * ip6_tnl_dev_setup - setup virtual tunnel device
L
Linus Torvalds 已提交
1444 1445 1446 1447 1448 1449
 *   @dev: virtual device associated with tunnel
 *
 * Description:
 *   Initialize function pointers and device parameters
 **/

1450
static void ip6_tnl_dev_setup(struct net_device *dev)
L
Linus Torvalds 已提交
1451
{
1452 1453
	struct ip6_tnl *t;

1454
	dev->netdev_ops = &ip6_tnl_netdev_ops;
E
Eric Dumazet 已提交
1455
	dev->destructor = ip6_dev_free;
L
Linus Torvalds 已提交
1456 1457 1458 1459

	dev->type = ARPHRD_TUNNEL6;
	dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
	dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1460 1461 1462
	t = netdev_priv(dev);
	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
		dev->mtu-=8;
L
Linus Torvalds 已提交
1463 1464
	dev->flags |= IFF_NOARP;
	dev->addr_len = sizeof(struct in6_addr);
1465
	dev->features |= NETIF_F_NETNS_LOCAL;
1466
	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
L
Linus Torvalds 已提交
1467 1468 1469 1470
}


/**
1471
 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
L
Linus Torvalds 已提交
1472 1473 1474
 *   @dev: virtual device associated with tunnel
 **/

E
Eric Dumazet 已提交
1475
static inline int
1476
ip6_tnl_dev_init_gen(struct net_device *dev)
L
Linus Torvalds 已提交
1477
{
1478
	struct ip6_tnl *t = netdev_priv(dev);
E
Eric Dumazet 已提交
1479

L
Linus Torvalds 已提交
1480
	t->dev = dev;
E
Eric Dumazet 已提交
1481 1482 1483 1484
	dev->tstats = alloc_percpu(struct pcpu_tstats);
	if (!dev->tstats)
		return -ENOMEM;
	return 0;
L
Linus Torvalds 已提交
1485 1486 1487
}

/**
1488
 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
L
Linus Torvalds 已提交
1489 1490 1491
 *   @dev: virtual device associated with tunnel
 **/

E
Eric Dumazet 已提交
1492
static int ip6_tnl_dev_init(struct net_device *dev)
L
Linus Torvalds 已提交
1493
{
1494
	struct ip6_tnl *t = netdev_priv(dev);
E
Eric Dumazet 已提交
1495 1496 1497 1498
	int err = ip6_tnl_dev_init_gen(dev);

	if (err)
		return err;
1499
	ip6_tnl_link_config(t);
E
Eric Dumazet 已提交
1500
	return 0;
L
Linus Torvalds 已提交
1501 1502 1503
}

/**
1504
 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
L
Linus Torvalds 已提交
1505 1506 1507 1508 1509
 *   @dev: fallback device
 *
 * Return: 0
 **/

E
Eric Dumazet 已提交
1510
static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
L
Linus Torvalds 已提交
1511
{
1512
	struct ip6_tnl *t = netdev_priv(dev);
1513 1514
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
E
Eric Dumazet 已提交
1515 1516 1517 1518
	int err = ip6_tnl_dev_init_gen(dev);

	if (err)
		return err;
1519

1520
	t->parms.proto = IPPROTO_IPV6;
L
Linus Torvalds 已提交
1521
	dev_hold(dev);
1522 1523 1524

	ip6_tnl_link_config(t);

1525
	rcu_assign_pointer(ip6n->tnls_wc[0], t);
E
Eric Dumazet 已提交
1526
	return 0;
L
Linus Torvalds 已提交
1527 1528
}

1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618
static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
{
	u8 proto;

	if (!data)
		return 0;

	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
	if (proto != IPPROTO_IPV6 &&
	    proto != IPPROTO_IPIP &&
	    proto != 0)
		return -EINVAL;

	return 0;
}

static void ip6_tnl_netlink_parms(struct nlattr *data[],
				  struct __ip6_tnl_parm *parms)
{
	memset(parms, 0, sizeof(*parms));

	if (!data)
		return;

	if (data[IFLA_IPTUN_LINK])
		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);

	if (data[IFLA_IPTUN_LOCAL])
		nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL],
			   sizeof(struct in6_addr));

	if (data[IFLA_IPTUN_REMOTE])
		nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE],
			   sizeof(struct in6_addr));

	if (data[IFLA_IPTUN_TTL])
		parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);

	if (data[IFLA_IPTUN_ENCAP_LIMIT])
		parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);

	if (data[IFLA_IPTUN_FLOWINFO])
		parms->flowinfo = nla_get_u32(data[IFLA_IPTUN_FLOWINFO]);

	if (data[IFLA_IPTUN_FLAGS])
		parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);

	if (data[IFLA_IPTUN_PROTO])
		parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
}

static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
			   struct nlattr *tb[], struct nlattr *data[])
{
	struct net *net = dev_net(dev);
	struct ip6_tnl *nt;

	nt = netdev_priv(dev);
	ip6_tnl_netlink_parms(data, &nt->parms);

	if (ip6_tnl_locate(net, &nt->parms, 0))
		return -EEXIST;

	return ip6_tnl_create2(dev);
}

static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
			      struct nlattr *data[])
{
	struct ip6_tnl *t;
	struct __ip6_tnl_parm p;
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);

	if (dev == ip6n->fb_tnl_dev)
		return -EINVAL;

	ip6_tnl_netlink_parms(data, &p);

	t = ip6_tnl_locate(net, &p, 0);

	if (t) {
		if (t->dev != dev)
			return -EEXIST;
	} else
		t = netdev_priv(dev);

	return ip6_tnl_update(t, &p);
}

1619
static size_t ip6_tnl_get_size(const struct net_device *dev)
1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635
{
	return
		/* IFLA_IPTUN_LINK */
		nla_total_size(4) +
		/* IFLA_IPTUN_LOCAL */
		nla_total_size(sizeof(struct in6_addr)) +
		/* IFLA_IPTUN_REMOTE */
		nla_total_size(sizeof(struct in6_addr)) +
		/* IFLA_IPTUN_TTL */
		nla_total_size(1) +
		/* IFLA_IPTUN_ENCAP_LIMIT */
		nla_total_size(1) +
		/* IFLA_IPTUN_FLOWINFO */
		nla_total_size(4) +
		/* IFLA_IPTUN_FLAGS */
		nla_total_size(4) +
1636 1637
		/* IFLA_IPTUN_PROTO */
		nla_total_size(1) +
1638 1639 1640
		0;
}

1641
static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
{
	struct ip6_tnl *tunnel = netdev_priv(dev);
	struct __ip6_tnl_parm *parm = &tunnel->parms;

	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
	    nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
		    &parm->raddr) ||
	    nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
		    &parm->laddr) ||
	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
	    nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
	    nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
1654 1655
	    nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
1656 1657 1658 1659 1660 1661 1662
		goto nla_put_failure;
	return 0;

nla_put_failure:
	return -EMSGSIZE;
}

1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673
static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 },
	[IFLA_IPTUN_LOCAL]		= { .len = sizeof(struct in6_addr) },
	[IFLA_IPTUN_REMOTE]		= { .len = sizeof(struct in6_addr) },
	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 },
	[IFLA_IPTUN_ENCAP_LIMIT]	= { .type = NLA_U8 },
	[IFLA_IPTUN_FLOWINFO]		= { .type = NLA_U32 },
	[IFLA_IPTUN_FLAGS]		= { .type = NLA_U32 },
	[IFLA_IPTUN_PROTO]		= { .type = NLA_U8 },
};

1674 1675 1676
static struct rtnl_link_ops ip6_link_ops __read_mostly = {
	.kind		= "ip6tnl",
	.maxtype	= IFLA_IPTUN_MAX,
1677
	.policy		= ip6_tnl_policy,
1678
	.priv_size	= sizeof(struct ip6_tnl),
1679 1680 1681 1682
	.setup		= ip6_tnl_dev_setup,
	.validate	= ip6_tnl_validate,
	.newlink	= ip6_tnl_newlink,
	.changelink	= ip6_tnl_changelink,
1683 1684
	.get_size	= ip6_tnl_get_size,
	.fill_info	= ip6_tnl_fill_info,
1685 1686
};

1687
static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1688 1689 1690 1691 1692
	.handler	= ip4ip6_rcv,
	.err_handler	= ip4ip6_err,
	.priority	=	1,
};

1693
static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1694 1695
	.handler	= ip6ip6_rcv,
	.err_handler	= ip6ip6_err,
H
Herbert Xu 已提交
1696
	.priority	=	1,
L
Linus Torvalds 已提交
1697 1698
};

1699
static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1700 1701 1702
{
	int h;
	struct ip6_tnl *t;
1703
	LIST_HEAD(list);
1704 1705

	for (h = 0; h < HASH_SIZE; h++) {
E
Eric Dumazet 已提交
1706
		t = rtnl_dereference(ip6n->tnls_r_l[h]);
1707 1708
		while (t != NULL) {
			unregister_netdevice_queue(t->dev, &list);
E
Eric Dumazet 已提交
1709
			t = rtnl_dereference(t->next);
1710
		}
1711 1712
	}

E
Eric Dumazet 已提交
1713
	t = rtnl_dereference(ip6n->tnls_wc[0]);
1714 1715
	unregister_netdevice_queue(t->dev, &list);
	unregister_netdevice_many(&list);
1716 1717
}

1718
static int __net_init ip6_tnl_init_net(struct net *net)
1719
{
1720
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1721
	struct ip6_tnl *t = NULL;
1722 1723
	int err;

1724 1725 1726
	ip6n->tnls[0] = ip6n->tnls_wc;
	ip6n->tnls[1] = ip6n->tnls_r_l;

1727 1728 1729 1730 1731 1732
	err = -ENOMEM;
	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
				      ip6_tnl_dev_setup);

	if (!ip6n->fb_tnl_dev)
		goto err_alloc_dev;
1733
	dev_net_set(ip6n->fb_tnl_dev, net);
1734

E
Eric Dumazet 已提交
1735 1736 1737
	err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
	if (err < 0)
		goto err_register;
1738 1739 1740 1741

	err = register_netdev(ip6n->fb_tnl_dev);
	if (err < 0)
		goto err_register;
1742 1743 1744 1745

	t = netdev_priv(ip6n->fb_tnl_dev);

	strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
1746 1747
	return 0;

1748
err_register:
E
Eric Dumazet 已提交
1749
	ip6_dev_free(ip6n->fb_tnl_dev);
1750
err_alloc_dev:
1751 1752 1753
	return err;
}

1754
static void __net_exit ip6_tnl_exit_net(struct net *net)
1755
{
1756
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1757

1758 1759 1760
	rtnl_lock();
	ip6_tnl_destroy_tunnels(ip6n);
	rtnl_unlock();
1761 1762 1763 1764 1765
}

static struct pernet_operations ip6_tnl_net_ops = {
	.init = ip6_tnl_init_net,
	.exit = ip6_tnl_exit_net,
1766 1767
	.id   = &ip6_tnl_net_id,
	.size = sizeof(struct ip6_tnl_net),
1768 1769
};

L
Linus Torvalds 已提交
1770 1771 1772 1773 1774 1775 1776 1777 1778 1779
/**
 * ip6_tunnel_init - register protocol and reserve needed resources
 *
 * Return: 0 on success
 **/

static int __init ip6_tunnel_init(void)
{
	int  err;

1780 1781 1782 1783 1784 1785
	err = register_pernet_device(&ip6_tnl_net_ops);
	if (err < 0)
		goto out_pernet;

	err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
	if (err < 0) {
1786
		pr_err("%s: can't register ip4ip6\n", __func__);
1787
		goto out_ip4ip6;
1788 1789
	}

1790 1791
	err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
	if (err < 0) {
1792
		pr_err("%s: can't register ip6ip6\n", __func__);
1793
		goto out_ip6ip6;
L
Linus Torvalds 已提交
1794
	}
1795 1796 1797
	err = rtnl_link_register(&ip6_link_ops);
	if (err < 0)
		goto rtnl_link_failed;
1798

L
Linus Torvalds 已提交
1799
	return 0;
1800

1801 1802
rtnl_link_failed:
	xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
1803
out_ip6ip6:
1804
	xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1805 1806 1807
out_ip4ip6:
	unregister_pernet_device(&ip6_tnl_net_ops);
out_pernet:
L
Linus Torvalds 已提交
1808 1809 1810 1811 1812 1813 1814 1815 1816
	return err;
}

/**
 * ip6_tunnel_cleanup - free resources and unregister protocol
 **/

static void __exit ip6_tunnel_cleanup(void)
{
1817
	rtnl_link_unregister(&ip6_link_ops);
1818
	if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
1819
		pr_info("%s: can't deregister ip4ip6\n", __func__);
1820

1821
	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
1822
		pr_info("%s: can't deregister ip6ip6\n", __func__);
L
Linus Torvalds 已提交
1823

1824
	unregister_pernet_device(&ip6_tnl_net_ops);
L
Linus Torvalds 已提交
1825 1826 1827 1828
}

module_init(ip6_tunnel_init);
module_exit(ip6_tunnel_cleanup);