ip6_tunnel.c 47.3 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 *	IPv6 tunneling device
L
Linus Torvalds 已提交
3 4 5
 *	Linux INET6 implementation
 *
 *	Authors:
6
 *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
7
 *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
L
Linus Torvalds 已提交
8 9
 *
 *      Based on:
10
 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18 19 20
 *
 *      RFC 2473
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 */

21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

L
Linus Torvalds 已提交
23
#include <linux/module.h>
24
#include <linux/capability.h>
L
Linus Torvalds 已提交
25 26 27
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
28
#include <linux/icmp.h>
L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36 37 38 39 40
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/route.h>
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
41
#include <linux/slab.h>
E
Eric Dumazet 已提交
42
#include <linux/hash.h>
43
#include <linux/etherdevice.h>
L
Linus Torvalds 已提交
44 45

#include <asm/uaccess.h>
A
Arun Sharma 已提交
46
#include <linux/atomic.h>
L
Linus Torvalds 已提交
47

48
#include <net/icmp.h>
L
Linus Torvalds 已提交
49
#include <net/ip.h>
50
#include <net/ip_tunnels.h>
L
Linus Torvalds 已提交
51 52 53 54 55 56 57
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/ip6_tunnel.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
58 59
#include <net/net_namespace.h>
#include <net/netns/generic.h>
L
Linus Torvalds 已提交
60 61

MODULE_AUTHOR("Ville Nuorvala");
62
MODULE_DESCRIPTION("IPv6 tunneling device");
L
Linus Torvalds 已提交
63
MODULE_LICENSE("GPL");
64
MODULE_ALIAS_RTNL_LINK("ip6tnl");
S
stephen hemminger 已提交
65
MODULE_ALIAS_NETDEV("ip6tnl0");
L
Linus Torvalds 已提交
66

E
Eric Dumazet 已提交
67 68
#define HASH_SIZE_SHIFT  5
#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
L
Linus Torvalds 已提交
69

70 71 72 73
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");

E
Eric Dumazet 已提交
74 75 76 77 78 79
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);

	return hash_32(hash, HASH_SIZE_SHIFT);
}
L
Linus Torvalds 已提交
80

E
Eric Dumazet 已提交
81
static int ip6_tnl_dev_init(struct net_device *dev);
82
static void ip6_tnl_dev_setup(struct net_device *dev);
83
static struct rtnl_link_ops ip6_link_ops __read_mostly;
L
Linus Torvalds 已提交
84

85
static int ip6_tnl_net_id __read_mostly;
86
struct ip6_tnl_net {
87 88
	/* the IPv6 tunnel fallback device */
	struct net_device *fb_tnl_dev;
89
	/* lists for storing tunnels in use */
E
Eric Dumazet 已提交
90 91 92
	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
	struct ip6_tnl __rcu *tnls_wc[1];
	struct ip6_tnl __rcu **tnls[2];
93 94
};

E
Eric Dumazet 已提交
95 96
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
97
	struct pcpu_sw_netstats tmp, sum = { 0 };
E
Eric Dumazet 已提交
98 99 100
	int i;

	for_each_possible_cpu(i) {
101
		unsigned int start;
102 103
		const struct pcpu_sw_netstats *tstats =
						   per_cpu_ptr(dev->tstats, i);
E
Eric Dumazet 已提交
104

105
		do {
106
			start = u64_stats_fetch_begin_irq(&tstats->syncp);
107 108 109 110
			tmp.rx_packets = tstats->rx_packets;
			tmp.rx_bytes = tstats->rx_bytes;
			tmp.tx_packets = tstats->tx_packets;
			tmp.tx_bytes =  tstats->tx_bytes;
111
		} while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
112 113 114 115 116

		sum.rx_packets += tmp.rx_packets;
		sum.rx_bytes   += tmp.rx_bytes;
		sum.tx_packets += tmp.tx_packets;
		sum.tx_bytes   += tmp.tx_bytes;
E
Eric Dumazet 已提交
117 118 119 120 121 122 123 124
	}
	dev->stats.rx_packets = sum.rx_packets;
	dev->stats.rx_bytes   = sum.rx_bytes;
	dev->stats.tx_packets = sum.tx_packets;
	dev->stats.tx_bytes   = sum.tx_bytes;
	return &dev->stats;
}

125
/*
E
Eric Dumazet 已提交
126
 * Locking : hash tables are protected by RCU and RTNL
127
 */
L
Linus Torvalds 已提交
128

129 130
static void __ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
				      struct dst_entry *dst)
L
Linus Torvalds 已提交
131
{
132 133 134 135 136 137
	dst_release(idst->dst);
	if (dst) {
		dst_hold(dst);
		idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
	} else {
		idst->cookie = 0;
L
Linus Torvalds 已提交
138
	}
139 140 141 142 143 144
	idst->dst = dst;
}

static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
				    struct dst_entry *dst)
{
L
Linus Torvalds 已提交
145

146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
	spin_lock_bh(&idst->lock);
	__ip6_tnl_per_cpu_dst_set(idst, dst);
	spin_unlock_bh(&idst->lock);
}

struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
{
	struct ip6_tnl_dst *idst;
	struct dst_entry *dst;

	idst = raw_cpu_ptr(t->dst_cache);
	spin_lock_bh(&idst->lock);
	dst = idst->dst;
	if (dst) {
		if (!dst->obsolete || dst->ops->check(dst, idst->cookie)) {
			dst_hold(idst->dst);
		} else {
			__ip6_tnl_per_cpu_dst_set(idst, NULL);
			dst = NULL;
		}
	}
	spin_unlock_bh(&idst->lock);
L
Linus Torvalds 已提交
168 169
	return dst;
}
170
EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
L
Linus Torvalds 已提交
171

X
xeb@mail.ru 已提交
172
void ip6_tnl_dst_reset(struct ip6_tnl *t)
L
Linus Torvalds 已提交
173
{
174 175 176 177
	int i;

	for_each_possible_cpu(i)
		ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
L
Linus Torvalds 已提交
178
}
X
xeb@mail.ru 已提交
179
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
L
Linus Torvalds 已提交
180

181
void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
L
Linus Torvalds 已提交
182
{
183 184
	ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);

L
Linus Torvalds 已提交
185
}
186
EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
L
Linus Torvalds 已提交
187

188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
void ip6_tnl_dst_destroy(struct ip6_tnl *t)
{
	if (!t->dst_cache)
		return;

	ip6_tnl_dst_reset(t);
	free_percpu(t->dst_cache);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);

int ip6_tnl_dst_init(struct ip6_tnl *t)
{
	int i;

	t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
	if (!t->dst_cache)
		return -ENOMEM;

	for_each_possible_cpu(i)
		spin_lock_init(&per_cpu_ptr(t->dst_cache, i)->lock);

	return 0;
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);

L
Linus Torvalds 已提交
213
/**
214
 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
215 216
 *   @remote: the address of the tunnel exit-point
 *   @local: the address of the tunnel entry-point
L
Linus Torvalds 已提交
217
 *
218
 * Return:
L
Linus Torvalds 已提交
219
 *   tunnel matching given end-points if found,
220
 *   else fallback tunnel if its device is up,
L
Linus Torvalds 已提交
221 222 223
 *   else %NULL
 **/

224 225 226
#define for_each_ip6_tunnel_rcu(start) \
	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))

L
Linus Torvalds 已提交
227
static struct ip6_tnl *
228
ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
L
Linus Torvalds 已提交
229
{
E
Eric Dumazet 已提交
230
	unsigned int hash = HASH(remote, local);
L
Linus Torvalds 已提交
231
	struct ip6_tnl *t;
232
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
233
	struct in6_addr any;
L
Linus Torvalds 已提交
234

E
Eric Dumazet 已提交
235
	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
L
Linus Torvalds 已提交
236 237 238 239 240
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
		    ipv6_addr_equal(remote, &t->parms.raddr) &&
		    (t->dev->flags & IFF_UP))
			return t;
	}
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256

	memset(&any, 0, sizeof(any));
	hash = HASH(&any, local);
	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
		    (t->dev->flags & IFF_UP))
			return t;
	}

	hash = HASH(remote, &any);
	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
		if (ipv6_addr_equal(remote, &t->parms.raddr) &&
		    (t->dev->flags & IFF_UP))
			return t;
	}

257 258
	t = rcu_dereference(ip6n->tnls_wc[0]);
	if (t && (t->dev->flags & IFF_UP))
L
Linus Torvalds 已提交
259 260 261 262 263 264
		return t;

	return NULL;
}

/**
265
 * ip6_tnl_bucket - get head of list matching given tunnel parameters
266
 *   @p: parameters containing tunnel end-points
L
Linus Torvalds 已提交
267 268
 *
 * Description:
269
 *   ip6_tnl_bucket() returns the head of the list matching the
L
Linus Torvalds 已提交
270 271
 *   &struct in6_addr entries laddr and raddr in @p.
 *
272
 * Return: head of IPv6 tunnel list
L
Linus Torvalds 已提交
273 274
 **/

E
Eric Dumazet 已提交
275
static struct ip6_tnl __rcu **
X
xeb@mail.ru 已提交
276
ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
L
Linus Torvalds 已提交
277
{
278 279
	const struct in6_addr *remote = &p->raddr;
	const struct in6_addr *local = &p->laddr;
280
	unsigned int h = 0;
L
Linus Torvalds 已提交
281 282 283 284
	int prio = 0;

	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
		prio = 1;
E
Eric Dumazet 已提交
285
		h = HASH(remote, local);
L
Linus Torvalds 已提交
286
	}
287
	return &ip6n->tnls[prio][h];
L
Linus Torvalds 已提交
288 289 290
}

/**
291
 * ip6_tnl_link - add tunnel to hash table
L
Linus Torvalds 已提交
292 293 294 295
 *   @t: tunnel to be added
 **/

static void
296
ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
L
Linus Torvalds 已提交
297
{
E
Eric Dumazet 已提交
298
	struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
L
Linus Torvalds 已提交
299

300 301
	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
	rcu_assign_pointer(*tp, t);
L
Linus Torvalds 已提交
302 303 304
}

/**
305
 * ip6_tnl_unlink - remove tunnel from hash table
L
Linus Torvalds 已提交
306 307 308 309
 *   @t: tunnel to be removed
 **/

static void
310
ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
L
Linus Torvalds 已提交
311
{
E
Eric Dumazet 已提交
312 313 314 315 316 317 318
	struct ip6_tnl __rcu **tp;
	struct ip6_tnl *iter;

	for (tp = ip6_tnl_bucket(ip6n, &t->parms);
	     (iter = rtnl_dereference(*tp)) != NULL;
	     tp = &iter->next) {
		if (t == iter) {
319
			rcu_assign_pointer(*tp, t->next);
L
Linus Torvalds 已提交
320 321 322 323 324
			break;
		}
	}
}

E
Eric Dumazet 已提交
325 326
static void ip6_dev_free(struct net_device *dev)
{
327 328 329
	struct ip6_tnl *t = netdev_priv(dev);

	ip6_tnl_dst_destroy(t);
E
Eric Dumazet 已提交
330 331 332 333
	free_percpu(dev->tstats);
	free_netdev(dev);
}

334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
static int ip6_tnl_create2(struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
	int err;

	t = netdev_priv(dev);

	err = register_netdevice(dev);
	if (err < 0)
		goto out;

	strcpy(t->parms.name, dev->name);
	dev->rtnl_link_ops = &ip6_link_ops;

	dev_hold(dev);
	ip6_tnl_link(ip6n, t);
	return 0;

out:
	return err;
}

L
Linus Torvalds 已提交
358
/**
359
 * ip6_tnl_create - create a new tunnel
L
Linus Torvalds 已提交
360 361 362 363 364
 *   @p: tunnel parameters
 *   @pt: pointer to new tunnel
 *
 * Description:
 *   Create tunnel matching given parameters.
365 366
 *
 * Return:
367
 *   created tunnel or error pointer
L
Linus Torvalds 已提交
368 369
 **/

X
xeb@mail.ru 已提交
370
static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
L
Linus Torvalds 已提交
371 372 373 374
{
	struct net_device *dev;
	struct ip6_tnl *t;
	char name[IFNAMSIZ];
375
	int err = -ENOMEM;
L
Linus Torvalds 已提交
376

377
	if (p->name[0])
L
Linus Torvalds 已提交
378
		strlcpy(name, p->name, IFNAMSIZ);
379 380 381
	else
		sprintf(name, "ip6tnl%%d");

382 383
	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
			   ip6_tnl_dev_setup);
384
	if (!dev)
385
		goto failed;
L
Linus Torvalds 已提交
386

387 388
	dev_net_set(dev, net);

389
	t = netdev_priv(dev);
L
Linus Torvalds 已提交
390
	t->parms = *p;
N
Nicolas Dichtel 已提交
391
	t->net = dev_net(dev);
392
	err = ip6_tnl_create2(dev);
E
Eric Dumazet 已提交
393 394
	if (err < 0)
		goto failed_free;
L
Linus Torvalds 已提交
395

396
	return t;
397 398

failed_free:
E
Eric Dumazet 已提交
399
	ip6_dev_free(dev);
400
failed:
401
	return ERR_PTR(err);
L
Linus Torvalds 已提交
402 403 404
}

/**
405
 * ip6_tnl_locate - find or create tunnel matching given parameters
406
 *   @p: tunnel parameters
L
Linus Torvalds 已提交
407 408 409
 *   @create: != 0 if allowed to create new tunnel if no match found
 *
 * Description:
410
 *   ip6_tnl_locate() first tries to locate an existing tunnel
L
Linus Torvalds 已提交
411 412 413 414
 *   based on @parms. If this is unsuccessful, but @create is set a new
 *   tunnel device is created and registered for use.
 *
 * Return:
415
 *   matching tunnel or error pointer
L
Linus Torvalds 已提交
416 417
 **/

418
static struct ip6_tnl *ip6_tnl_locate(struct net *net,
X
xeb@mail.ru 已提交
419
		struct __ip6_tnl_parm *p, int create)
L
Linus Torvalds 已提交
420
{
421 422
	const struct in6_addr *remote = &p->raddr;
	const struct in6_addr *local = &p->laddr;
E
Eric Dumazet 已提交
423
	struct ip6_tnl __rcu **tp;
L
Linus Torvalds 已提交
424
	struct ip6_tnl *t;
425
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
L
Linus Torvalds 已提交
426

E
Eric Dumazet 已提交
427 428 429
	for (tp = ip6_tnl_bucket(ip6n, p);
	     (t = rtnl_dereference(*tp)) != NULL;
	     tp = &t->next) {
L
Linus Torvalds 已提交
430
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
431 432
		    ipv6_addr_equal(remote, &t->parms.raddr)) {
			if (create)
433
				return ERR_PTR(-EEXIST);
434

435
			return t;
436
		}
L
Linus Torvalds 已提交
437 438
	}
	if (!create)
439
		return ERR_PTR(-ENODEV);
440
	return ip6_tnl_create(net, p);
L
Linus Torvalds 已提交
441 442 443
}

/**
444
 * ip6_tnl_dev_uninit - tunnel device uninitializer
L
Linus Torvalds 已提交
445
 *   @dev: the device to be destroyed
446
 *
L
Linus Torvalds 已提交
447
 * Description:
448
 *   ip6_tnl_dev_uninit() removes tunnel from its list
L
Linus Torvalds 已提交
449 450 451
 **/

static void
452
ip6_tnl_dev_uninit(struct net_device *dev)
L
Linus Torvalds 已提交
453
{
454
	struct ip6_tnl *t = netdev_priv(dev);
N
Nicolas Dichtel 已提交
455
	struct net *net = t->net;
456
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
L
Linus Torvalds 已提交
457

E
Eric Dumazet 已提交
458
	if (dev == ip6n->fb_tnl_dev)
459
		RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
E
Eric Dumazet 已提交
460
	else
461
		ip6_tnl_unlink(ip6n, t);
L
Linus Torvalds 已提交
462 463 464 465 466 467 468 469
	ip6_tnl_dst_reset(t);
	dev_put(dev);
}

/**
 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 *   @skb: received socket buffer
 *
470 471
 * Return:
 *   0 if none was found,
L
Linus Torvalds 已提交
472 473 474
 *   else index to encapsulation limit
 **/

X
xeb@mail.ru 已提交
475
__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
L
Linus Torvalds 已提交
476
{
477
	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
L
Linus Torvalds 已提交
478
	__u8 nexthdr = ipv6h->nexthdr;
479
	__u16 off = sizeof(*ipv6h);
L
Linus Torvalds 已提交
480 481 482 483

	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
		__u16 optlen = 0;
		struct ipv6_opt_hdr *hdr;
484
		if (raw + off + sizeof(*hdr) > skb->data &&
L
Linus Torvalds 已提交
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
		    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
			break;

		hdr = (struct ipv6_opt_hdr *) (raw + off);
		if (nexthdr == NEXTHDR_FRAGMENT) {
			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
			if (frag_hdr->frag_off)
				break;
			optlen = 8;
		} else if (nexthdr == NEXTHDR_AUTH) {
			optlen = (hdr->hdrlen + 2) << 2;
		} else {
			optlen = ipv6_optlen(hdr);
		}
		if (nexthdr == NEXTHDR_DEST) {
			__u16 i = off + 2;
			while (1) {
				struct ipv6_tlv_tnl_enc_lim *tel;

				/* No more room for encapsulation limit */
				if (i + sizeof (*tel) > off + optlen)
					break;

				tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
				/* return index of option if found and valid */
				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
				    tel->length == 1)
					return i;
				/* else jump to next option */
				if (tel->type)
					i += tel->length + 2;
				else
					i++;
			}
		}
		nexthdr = hdr->nexthdr;
		off += optlen;
	}
	return 0;
}
X
xeb@mail.ru 已提交
525
EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
L
Linus Torvalds 已提交
526 527

/**
528
 * ip6_tnl_err - tunnel error handler
L
Linus Torvalds 已提交
529 530
 *
 * Description:
531
 *   ip6_tnl_err() should handle errors in the tunnel according
L
Linus Torvalds 已提交
532 533 534
 *   to the specifications in RFC 2473.
 **/

H
Herbert Xu 已提交
535
static int
536
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
537
	    u8 *type, u8 *code, int *msg, __u32 *info, int offset)
L
Linus Torvalds 已提交
538
{
539
	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
L
Linus Torvalds 已提交
540 541
	struct ip6_tnl *t;
	int rel_msg = 0;
542 543
	u8 rel_type = ICMPV6_DEST_UNREACH;
	u8 rel_code = ICMPV6_ADDR_UNREACH;
544
	u8 tproto;
L
Linus Torvalds 已提交
545 546
	__u32 rel_info = 0;
	__u16 len;
H
Herbert Xu 已提交
547
	int err = -ENOENT;
L
Linus Torvalds 已提交
548

549 550
	/* If the packet doesn't contain the original IPv6 header we are
	   in trouble since we might need the source address for further
L
Linus Torvalds 已提交
551 552
	   processing of the error. */

553
	rcu_read_lock();
554
	t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
555
	if (!t)
L
Linus Torvalds 已提交
556 557
		goto out;

558 559
	tproto = ACCESS_ONCE(t->parms.proto);
	if (tproto != ipproto && tproto != 0)
560 561
		goto out;

H
Herbert Xu 已提交
562 563
	err = 0;

564
	switch (*type) {
L
Linus Torvalds 已提交
565 566 567 568
		__u32 teli;
		struct ipv6_tlv_tnl_enc_lim *tel;
		__u32 mtu;
	case ICMPV6_DEST_UNREACH:
569 570
		net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
				     t->parms.name);
L
Linus Torvalds 已提交
571 572 573
		rel_msg = 1;
		break;
	case ICMPV6_TIME_EXCEED:
574
		if ((*code) == ICMPV6_EXC_HOPLIMIT) {
575 576
			net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
					     t->parms.name);
L
Linus Torvalds 已提交
577 578 579 580
			rel_msg = 1;
		}
		break;
	case ICMPV6_PARAMPROB:
581
		teli = 0;
582
		if ((*code) == ICMPV6_HDR_FIELD)
X
xeb@mail.ru 已提交
583
			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
L
Linus Torvalds 已提交
584

A
Al Viro 已提交
585
		if (teli && teli == *info - 2) {
L
Linus Torvalds 已提交
586 587
			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
			if (tel->encap_limit == 0) {
588 589
				net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
						     t->parms.name);
L
Linus Torvalds 已提交
590 591
				rel_msg = 1;
			}
592 593 594
		} else {
			net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
					     t->parms.name);
L
Linus Torvalds 已提交
595 596 597
		}
		break;
	case ICMPV6_PKT_TOOBIG:
A
Al Viro 已提交
598
		mtu = *info - offset;
L
Linus Torvalds 已提交
599 600 601 602
		if (mtu < IPV6_MIN_MTU)
			mtu = IPV6_MIN_MTU;
		t->dev->mtu = mtu;

603 604
		len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
		if (len > mtu) {
L
Linus Torvalds 已提交
605 606 607 608 609 610 611
			rel_type = ICMPV6_PKT_TOOBIG;
			rel_code = 0;
			rel_info = mtu;
			rel_msg = 1;
		}
		break;
	}
612 613 614 615 616 617 618

	*type = rel_type;
	*code = rel_code;
	*info = rel_info;
	*msg = rel_msg;

out:
619
	rcu_read_unlock();
620 621 622
	return err;
}

623 624
static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
625
	   u8 type, u8 code, int offset, __be32 info)
626 627
{
	int rel_msg = 0;
628 629
	u8 rel_type = type;
	u8 rel_code = code;
A
Al Viro 已提交
630
	__u32 rel_info = ntohl(info);
631 632
	int err;
	struct sk_buff *skb2;
633
	const struct iphdr *eiph;
634
	struct rtable *rt;
635
	struct flowi4 fl4;
636

637 638
	err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
	if (err < 0)
		return err;

	if (rel_msg == 0)
		return 0;

	switch (rel_type) {
	case ICMPV6_DEST_UNREACH:
		if (rel_code != ICMPV6_ADDR_UNREACH)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_HOST_UNREACH;
		break;
	case ICMPV6_PKT_TOOBIG:
		if (rel_code != 0)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_FRAG_NEEDED;
		break;
658 659 660
	case NDISC_REDIRECT:
		rel_type = ICMP_REDIRECT;
		rel_code = ICMP_REDIR_HOST;
661 662 663 664 665 666 667 668 669 670 671
	default:
		return 0;
	}

	if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
		return 0;

	skb2 = skb_clone(skb, GFP_ATOMIC);
	if (!skb2)
		return 0;

E
Eric Dumazet 已提交
672 673
	skb_dst_drop(skb2);

674
	skb_pull(skb2, offset);
675
	skb_reset_network_header(skb2);
676
	eiph = ip_hdr(skb2);
677 678

	/* Try to guess incoming interface */
679
	rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
680 681 682
				   eiph->saddr, 0,
				   0, 0,
				   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
683
	if (IS_ERR(rt))
684 685
		goto out;

686
	skb2->dev = rt->dst.dev;
687 688 689 690 691

	/* route "incoming" packet */
	if (rt->rt_flags & RTCF_LOCAL) {
		ip_rt_put(rt);
		rt = NULL;
692
		rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
693 694 695 696
					   eiph->daddr, eiph->saddr,
					   0, 0,
					   IPPROTO_IPIP,
					   RT_TOS(eiph->tos), 0);
697
		if (IS_ERR(rt) ||
698
		    rt->dst.dev->type != ARPHRD_TUNNEL) {
699 700
			if (!IS_ERR(rt))
				ip_rt_put(rt);
701 702
			goto out;
		}
703
		skb_dst_set(skb2, &rt->dst);
704 705 706 707
	} else {
		ip_rt_put(rt);
		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
				   skb2->dev) ||
E
Eric Dumazet 已提交
708
		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
709 710 711 712 713
			goto out;
	}

	/* change mtu on this route */
	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
E
Eric Dumazet 已提交
714
		if (rel_info > dst_mtu(skb_dst(skb2)))
715 716
			goto out;

717
		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
718
	}
719
	if (rel_type == ICMP_REDIRECT)
720
		skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
721

A
Al Viro 已提交
722
	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
723 724 725 726 727 728

out:
	kfree_skb(skb2);
	return 0;
}

729 730
static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
731
	   u8 type, u8 code, int offset, __be32 info)
732 733
{
	int rel_msg = 0;
734 735
	u8 rel_type = type;
	u8 rel_code = code;
A
Al Viro 已提交
736
	__u32 rel_info = ntohl(info);
737 738
	int err;

739 740
	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
741 742 743 744
	if (err < 0)
		return err;

	if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
L
Linus Torvalds 已提交
745 746
		struct rt6_info *rt;
		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
747

L
Linus Torvalds 已提交
748
		if (!skb2)
749
			return 0;
L
Linus Torvalds 已提交
750

E
Eric Dumazet 已提交
751
		skb_dst_drop(skb2);
L
Linus Torvalds 已提交
752
		skb_pull(skb2, offset);
753
		skb_reset_network_header(skb2);
L
Linus Torvalds 已提交
754 755

		/* Try to guess incoming interface */
756 757
		rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
				NULL, 0, 0);
L
Linus Torvalds 已提交
758

759 760
		if (rt && rt->dst.dev)
			skb2->dev = rt->dst.dev;
L
Linus Torvalds 已提交
761

762
		icmpv6_send(skb2, rel_type, rel_code, rel_info);
L
Linus Torvalds 已提交
763

A
Amerigo Wang 已提交
764
		ip6_rt_put(rt);
L
Linus Torvalds 已提交
765 766 767

		kfree_skb(skb2);
	}
768 769

	return 0;
L
Linus Torvalds 已提交
770 771
}

772 773 774
static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
				       const struct ipv6hdr *ipv6h,
				       struct sk_buff *skb)
775 776 777 778
{
	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;

	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
779
		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
780

781
	return IP6_ECN_decapsulate(ipv6h, skb);
782 783
}

784 785 786
static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
				       const struct ipv6hdr *ipv6h,
				       struct sk_buff *skb)
L
Linus Torvalds 已提交
787
{
788
	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
789
		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
L
Linus Torvalds 已提交
790

791
	return IP6_ECN_decapsulate(ipv6h, skb);
L
Linus Torvalds 已提交
792
}
793

X
xeb@mail.ru 已提交
794
__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
795 796 797
			     const struct in6_addr *laddr,
			     const struct in6_addr *raddr)
{
X
xeb@mail.ru 已提交
798
	struct __ip6_tnl_parm *p = &t->parms;
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
	int ltype = ipv6_addr_type(laddr);
	int rtype = ipv6_addr_type(raddr);
	__u32 flags = 0;

	if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
		flags = IP6_TNL_F_CAP_PER_PACKET;
	} else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
		   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
		   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
		   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
		if (ltype&IPV6_ADDR_UNICAST)
			flags |= IP6_TNL_F_CAP_XMIT;
		if (rtype&IPV6_ADDR_UNICAST)
			flags |= IP6_TNL_F_CAP_RCV;
	}
	return flags;
}
X
xeb@mail.ru 已提交
816
EXPORT_SYMBOL(ip6_tnl_get_cap);
817

E
Eric Dumazet 已提交
818
/* called with rcu_read_lock() */
X
xeb@mail.ru 已提交
819
int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
820 821
				  const struct in6_addr *laddr,
				  const struct in6_addr *raddr)
822
{
X
xeb@mail.ru 已提交
823
	struct __ip6_tnl_parm *p = &t->parms;
824
	int ret = 0;
N
Nicolas Dichtel 已提交
825
	struct net *net = t->net;
826

827 828 829
	if ((p->flags & IP6_TNL_F_CAP_RCV) ||
	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
830
		struct net_device *ldev = NULL;
831 832

		if (p->link)
E
Eric Dumazet 已提交
833
			ldev = dev_get_by_index_rcu(net, p->link);
834

835 836 837
		if ((ipv6_addr_is_multicast(laddr) ||
		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
		    likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
838 839 840 841
			ret = 1;
	}
	return ret;
}
X
xeb@mail.ru 已提交
842
EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
L
Linus Torvalds 已提交
843 844

/**
845
 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
L
Linus Torvalds 已提交
846
 *   @skb: received socket buffer
847 848
 *   @protocol: ethernet protocol ID
 *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
L
Linus Torvalds 已提交
849 850 851 852
 *
 * Return: 0
 **/

853
static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
854
		       __u8 ipproto,
855 856 857
		       int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
						   const struct ipv6hdr *ipv6h,
						   struct sk_buff *skb))
L
Linus Torvalds 已提交
858 859
{
	struct ip6_tnl *t;
860
	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
861
	u8 tproto;
862
	int err;
L
Linus Torvalds 已提交
863

864
	rcu_read_lock();
865
	t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
866
	if (t) {
867
		struct pcpu_sw_netstats *tstats;
E
Eric Dumazet 已提交
868

869 870
		tproto = ACCESS_ONCE(t->parms.proto);
		if (tproto != ipproto && tproto != 0) {
871
			rcu_read_unlock();
872 873 874
			goto discard;
		}

L
Linus Torvalds 已提交
875
		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
876
			rcu_read_unlock();
877
			goto discard;
L
Linus Torvalds 已提交
878 879
		}

880
		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
881
			t->dev->stats.rx_dropped++;
882
			rcu_read_unlock();
L
Linus Torvalds 已提交
883 884
			goto discard;
		}
885
		skb->mac_header = skb->network_header;
886
		skb_reset_network_header(skb);
887
		skb->protocol = htons(protocol);
L
Linus Torvalds 已提交
888
		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
889

890
		__skb_tunnel_rx(skb, t->dev, t->net);
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905

		err = dscp_ecn_decapsulate(t, ipv6h, skb);
		if (unlikely(err)) {
			if (log_ecn_error)
				net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
						     &ipv6h->saddr,
						     ipv6_get_dsfield(ipv6h));
			if (err > 1) {
				++t->dev->stats.rx_frame_errors;
				++t->dev->stats.rx_errors;
				rcu_read_unlock();
				goto discard;
			}
		}

E
Eric Dumazet 已提交
906
		tstats = this_cpu_ptr(t->dev->tstats);
907
		u64_stats_update_begin(&tstats->syncp);
E
Eric Dumazet 已提交
908 909
		tstats->rx_packets++;
		tstats->rx_bytes += skb->len;
910
		u64_stats_update_end(&tstats->syncp);
E
Eric Dumazet 已提交
911

912
		netif_rx(skb);
E
Eric Dumazet 已提交
913

914
		rcu_read_unlock();
L
Linus Torvalds 已提交
915 916
		return 0;
	}
917
	rcu_read_unlock();
L
Linus Torvalds 已提交
918
	return 1;
919 920 921 922

discard:
	kfree_skb(skb);
	return 0;
L
Linus Torvalds 已提交
923 924
}

925 926
static int ip4ip6_rcv(struct sk_buff *skb)
{
927 928
	return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
			   ip4ip6_dscp_ecn_decapsulate);
929 930
}

931 932
static int ip6ip6_rcv(struct sk_buff *skb)
{
933 934
	return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
			   ip6ip6_dscp_ecn_decapsulate);
935 936
}

937 938 939 940
struct ipv6_tel_txoption {
	struct ipv6_txoptions ops;
	__u8 dst_opt[8];
};
L
Linus Torvalds 已提交
941

942 943 944
static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
{
	memset(opt, 0, sizeof(struct ipv6_tel_txoption));
L
Linus Torvalds 已提交
945

946 947 948 949 950
	opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
	opt->dst_opt[3] = 1;
	opt->dst_opt[4] = encap_limit;
	opt->dst_opt[5] = IPV6_TLV_PADN;
	opt->dst_opt[6] = 1;
L
Linus Torvalds 已提交
951

952 953
	opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
	opt->ops.opt_nflen = 8;
L
Linus Torvalds 已提交
954 955 956
}

/**
957
 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
L
Linus Torvalds 已提交
958
 *   @t: the outgoing tunnel device
959
 *   @hdr: IPv6 header from the incoming packet
L
Linus Torvalds 已提交
960 961
 *
 * Description:
962
 *   Avoid trivial tunneling loop by checking that tunnel exit-point
L
Linus Torvalds 已提交
963 964
 *   doesn't match source of incoming packet.
 *
965
 * Return:
L
Linus Torvalds 已提交
966 967 968 969
 *   1 if conflict,
 *   0 else
 **/

E
Eric Dumazet 已提交
970
static inline bool
971
ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
L
Linus Torvalds 已提交
972 973 974 975
{
	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}

976 977 978
int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
		     const struct in6_addr *laddr,
		     const struct in6_addr *raddr)
979
{
X
xeb@mail.ru 已提交
980
	struct __ip6_tnl_parm *p = &t->parms;
981
	int ret = 0;
N
Nicolas Dichtel 已提交
982
	struct net *net = t->net;
983

984 985 986
	if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
987 988
		struct net_device *ldev = NULL;

E
Eric Dumazet 已提交
989
		rcu_read_lock();
990
		if (p->link)
E
Eric Dumazet 已提交
991
			ldev = dev_get_by_index_rcu(net, p->link);
992

993
		if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
994 995
			pr_warn("%s xmit: Local address not yet configured!\n",
				p->name);
996 997
		else if (!ipv6_addr_is_multicast(raddr) &&
			 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
998 999
			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
				p->name);
1000 1001
		else
			ret = 1;
E
Eric Dumazet 已提交
1002
		rcu_read_unlock();
1003 1004 1005
	}
	return ret;
}
X
xeb@mail.ru 已提交
1006 1007
EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);

L
Linus Torvalds 已提交
1008
/**
1009
 * ip6_tnl_xmit2 - encapsulate packet and send
L
Linus Torvalds 已提交
1010
 *   @skb: the outgoing socket buffer
1011
 *   @dev: the outgoing tunnel device
1012 1013 1014 1015
 *   @dsfield: dscp code for outer header
 *   @fl: flow of tunneled packet
 *   @encap_limit: encapsulation limit
 *   @pmtu: Path MTU is stored if packet is too big
L
Linus Torvalds 已提交
1016 1017 1018 1019 1020
 *
 * Description:
 *   Build new header and do some sanity checks on the packet before sending
 *   it.
 *
1021
 * Return:
1022
 *   0 on success
1023 1024
 *   -1 fail
 *   %-EMSGSIZE message too big. return mtu in this case.
L
Linus Torvalds 已提交
1025 1026
 **/

1027 1028 1029
static int ip6_tnl_xmit2(struct sk_buff *skb,
			 struct net_device *dev,
			 __u8 dsfield,
1030
			 struct flowi6 *fl6,
1031 1032
			 int encap_limit,
			 __u32 *pmtu)
L
Linus Torvalds 已提交
1033
{
1034
	struct ip6_tnl *t = netdev_priv(dev);
N
Nicolas Dichtel 已提交
1035
	struct net *net = t->net;
1036
	struct net_device_stats *stats = &t->dev->stats;
1037
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1038
	struct ipv6_tel_txoption opt;
1039
	struct dst_entry *dst = NULL, *ndst = NULL;
L
Linus Torvalds 已提交
1040 1041
	struct net_device *tdev;
	int mtu;
1042
	unsigned int max_headroom = sizeof(struct ipv6hdr);
L
Linus Torvalds 已提交
1043
	u8 proto;
1044
	int err = -1;
L
Linus Torvalds 已提交
1045

1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
	/* NBMA tunnel */
	if (ipv6_addr_any(&t->parms.raddr)) {
		struct in6_addr *addr6;
		struct neighbour *neigh;
		int addr_type;

		if (!skb_dst(skb))
			goto tx_err_link_failure;

		neigh = dst_neigh_lookup(skb_dst(skb),
					 &ipv6_hdr(skb)->daddr);
		if (!neigh)
			goto tx_err_link_failure;

		addr6 = (struct in6_addr *)&neigh->primary_key;
		addr_type = ipv6_addr_type(addr6);

		if (addr_type == IPV6_ADDR_ANY)
			addr6 = &ipv6_hdr(skb)->daddr;

		memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
		neigh_release(neigh);
	} else if (!fl6->flowi6_mark)
1069
		dst = ip6_tnl_dst_get(t);
1070 1071 1072 1073

	if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
		goto tx_err_link_failure;

1074
	if (!dst) {
1075
		dst = ip6_route_output(net, NULL, fl6);
L
Linus Torvalds 已提交
1076

1077
		if (dst->error)
1078
			goto tx_err_link_failure;
1079 1080 1081 1082
		dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
		if (IS_ERR(dst)) {
			err = PTR_ERR(dst);
			dst = NULL;
1083 1084
			goto tx_err_link_failure;
		}
1085
		ndst = dst;
1086
	}
L
Linus Torvalds 已提交
1087 1088 1089 1090 1091

	tdev = dst->dev;

	if (tdev == dev) {
		stats->collisions++;
1092 1093
		net_warn_ratelimited("%s: Local routing loop detected!\n",
				     t->parms.name);
L
Linus Torvalds 已提交
1094 1095
		goto tx_err_dst_release;
	}
1096
	mtu = dst_mtu(dst) - sizeof(*ipv6h);
1097
	if (encap_limit >= 0) {
L
Linus Torvalds 已提交
1098 1099 1100 1101 1102
		max_headroom += 8;
		mtu -= 8;
	}
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;
E
Eric Dumazet 已提交
1103
	if (skb_dst(skb))
1104
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
L
Linus Torvalds 已提交
1105
	if (skb->len > mtu) {
1106 1107
		*pmtu = mtu;
		err = -EMSGSIZE;
L
Linus Torvalds 已提交
1108 1109 1110
		goto tx_err_dst_release;
	}

1111
	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
N
Nicolas Dichtel 已提交
1112

L
Linus Torvalds 已提交
1113 1114 1115 1116
	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom += LL_RESERVED_SPACE(tdev);
1117

1118 1119
	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
L
Linus Torvalds 已提交
1120
		struct sk_buff *new_skb;
1121

1122 1123
		new_skb = skb_realloc_headroom(skb, max_headroom);
		if (!new_skb)
L
Linus Torvalds 已提交
1124 1125 1126 1127
			goto tx_err_dst_release;

		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
1128
		consume_skb(skb);
L
Linus Torvalds 已提交
1129 1130
		skb = new_skb;
	}
1131 1132 1133 1134 1135

	if (!fl6->flowi6_mark && ndst)
		ip6_tnl_dst_set(t, ndst);
	skb_dst_set(skb, dst);

1136
	skb->transport_header = skb->network_header;
L
Linus Torvalds 已提交
1137

1138
	proto = fl6->flowi6_proto;
1139 1140 1141 1142
	if (encap_limit >= 0) {
		init_tel_txopt(&opt, encap_limit);
		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
	}
1143 1144 1145 1146 1147 1148

	if (likely(!skb->encapsulation)) {
		skb_reset_inner_headers(skb);
		skb->encapsulation = 1;
	}

1149 1150
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
1151
	ipv6h = ipv6_hdr(skb);
1152
	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
1153
		     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
L
Linus Torvalds 已提交
1154 1155
	ipv6h->hop_limit = t->parms.hop_limit;
	ipv6h->nexthdr = proto;
A
Alexey Dobriyan 已提交
1156 1157
	ipv6h->saddr = fl6->saddr;
	ipv6h->daddr = fl6->daddr;
1158
	ip6tunnel_xmit(NULL, skb, dev);
L
Linus Torvalds 已提交
1159 1160 1161 1162 1163
	return 0;
tx_err_link_failure:
	stats->tx_carrier_errors++;
	dst_link_failure(skb);
tx_err_dst_release:
1164
	dst_release(dst);
1165 1166 1167
	return err;
}

1168 1169 1170 1171
static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
1172
	const struct iphdr  *iph = ip_hdr(skb);
1173
	int encap_limit = -1;
1174
	struct flowi6 fl6;
1175 1176
	__u8 dsfield;
	__u32 mtu;
1177
	u8 tproto;
1178 1179
	int err;

1180
	tproto = ACCESS_ONCE(t->parms.proto);
1181
	if (tproto != IPPROTO_IPIP && tproto != 0)
1182 1183 1184 1185 1186
		return -1;

	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
		encap_limit = t->parms.encap_limit;

1187
	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1188
	fl6.flowi6_proto = IPPROTO_IPIP;
1189 1190 1191

	dsfield = ipv4_get_dsfield(iph);

1192
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1193
		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
A
Al Viro 已提交
1194
					  & IPV6_TCLASS_MASK;
1195 1196
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
		fl6.flowi6_mark = skb->mark;
1197

1198
	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
	if (err != 0) {
		/* XXX: send ICMP error even if DF is not set. */
		if (err == -EMSGSIZE)
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
				  htonl(mtu));
		return -1;
	}

	return 0;
}

1210 1211 1212 1213
static inline int
ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
1214
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1215 1216
	int encap_limit = -1;
	__u16 offset;
1217
	struct flowi6 fl6;
1218 1219
	__u8 dsfield;
	__u32 mtu;
1220
	u8 tproto;
1221 1222
	int err;

1223 1224
	tproto = ACCESS_ONCE(t->parms.proto);
	if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
1225
	    ip6_tnl_addr_conflict(t, ipv6h))
1226 1227
		return -1;

X
xeb@mail.ru 已提交
1228
	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1229
	if (offset > 0) {
1230
		struct ipv6_tlv_tnl_enc_lim *tel;
1231
		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1232 1233
		if (tel->encap_limit == 0) {
			icmpv6_send(skb, ICMPV6_PARAMPROB,
1234
				    ICMPV6_HDR_FIELD, offset + 2);
1235 1236 1237 1238 1239 1240
			return -1;
		}
		encap_limit = tel->encap_limit - 1;
	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
		encap_limit = t->parms.encap_limit;

1241
	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1242
	fl6.flowi6_proto = IPPROTO_IPV6;
1243 1244

	dsfield = ipv6_get_dsfield(ipv6h);
1245
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1246
		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1247
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
F
Florent Fourcot 已提交
1248
		fl6.flowlabel |= ip6_flowlabel(ipv6h);
1249 1250
	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
		fl6.flowi6_mark = skb->mark;
1251

1252
	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1253 1254
	if (err != 0) {
		if (err == -EMSGSIZE)
1255
			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1256 1257 1258 1259 1260 1261
		return -1;
	}

	return 0;
}

1262
static netdev_tx_t
1263 1264 1265
ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
1266
	struct net_device_stats *stats = &t->dev->stats;
1267 1268 1269
	int ret;

	switch (skb->protocol) {
1270
	case htons(ETH_P_IP):
1271 1272
		ret = ip4ip6_tnl_xmit(skb, dev);
		break;
1273
	case htons(ETH_P_IPV6):
1274 1275 1276 1277 1278 1279 1280 1281 1282
		ret = ip6ip6_tnl_xmit(skb, dev);
		break;
	default:
		goto tx_err;
	}

	if (ret < 0)
		goto tx_err;

1283
	return NETDEV_TX_OK;
1284

L
Linus Torvalds 已提交
1285 1286 1287 1288
tx_err:
	stats->tx_errors++;
	stats->tx_dropped++;
	kfree_skb(skb);
1289
	return NETDEV_TX_OK;
L
Linus Torvalds 已提交
1290 1291
}

1292
static void ip6_tnl_link_config(struct ip6_tnl *t)
L
Linus Torvalds 已提交
1293 1294
{
	struct net_device *dev = t->dev;
X
xeb@mail.ru 已提交
1295
	struct __ip6_tnl_parm *p = &t->parms;
1296
	struct flowi6 *fl6 = &t->fl.u.ip6;
L
Linus Torvalds 已提交
1297

1298 1299
	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
L
Linus Torvalds 已提交
1300 1301

	/* Set up flowi template */
A
Alexey Dobriyan 已提交
1302 1303
	fl6->saddr = p->laddr;
	fl6->daddr = p->raddr;
1304 1305
	fl6->flowi6_oif = p->link;
	fl6->flowlabel = 0;
L
Linus Torvalds 已提交
1306 1307

	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1308
		fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
L
Linus Torvalds 已提交
1309
	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1310
		fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
L
Linus Torvalds 已提交
1311

1312 1313
	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
L
Linus Torvalds 已提交
1314 1315 1316 1317 1318 1319 1320

	if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
		dev->flags |= IFF_POINTOPOINT;
	else
		dev->flags &= ~IFF_POINTOPOINT;

	if (p->flags & IP6_TNL_F_CAP_XMIT) {
1321 1322 1323
		int strict = (ipv6_addr_type(&p->raddr) &
			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));

N
Nicolas Dichtel 已提交
1324
		struct rt6_info *rt = rt6_lookup(t->net,
1325
						 &p->raddr, &p->laddr,
1326
						 p->link, strict);
L
Linus Torvalds 已提交
1327

1328
		if (!rt)
L
Linus Torvalds 已提交
1329 1330
			return;

1331 1332
		if (rt->dst.dev) {
			dev->hard_header_len = rt->dst.dev->hard_header_len +
1333
				sizeof(struct ipv6hdr);
L
Linus Torvalds 已提交
1334

1335
			dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr);
1336
			if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1337
				dev->mtu -= 8;
L
Linus Torvalds 已提交
1338 1339 1340 1341

			if (dev->mtu < IPV6_MIN_MTU)
				dev->mtu = IPV6_MIN_MTU;
		}
A
Amerigo Wang 已提交
1342
		ip6_rt_put(rt);
L
Linus Torvalds 已提交
1343 1344 1345 1346
	}
}

/**
1347
 * ip6_tnl_change - update the tunnel parameters
L
Linus Torvalds 已提交
1348 1349 1350 1351
 *   @t: tunnel to be changed
 *   @p: tunnel configuration parameters
 *
 * Description:
1352
 *   ip6_tnl_change() updates the tunnel parameters
L
Linus Torvalds 已提交
1353 1354 1355
 **/

static int
X
xeb@mail.ru 已提交
1356
ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
L
Linus Torvalds 已提交
1357
{
A
Alexey Dobriyan 已提交
1358 1359
	t->parms.laddr = p->laddr;
	t->parms.raddr = p->raddr;
L
Linus Torvalds 已提交
1360 1361 1362 1363
	t->parms.flags = p->flags;
	t->parms.hop_limit = p->hop_limit;
	t->parms.encap_limit = p->encap_limit;
	t->parms.flowinfo = p->flowinfo;
1364
	t->parms.link = p->link;
1365
	t->parms.proto = p->proto;
1366
	ip6_tnl_dst_reset(t);
1367
	ip6_tnl_link_config(t);
L
Linus Torvalds 已提交
1368 1369 1370
	return 0;
}

1371 1372
static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
{
N
Nicolas Dichtel 已提交
1373
	struct net *net = t->net;
1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
	int err;

	ip6_tnl_unlink(ip6n, t);
	synchronize_net();
	err = ip6_tnl_change(t, p);
	ip6_tnl_link(ip6n, t);
	netdev_state_change(t->dev);
	return err;
}

1385 1386 1387 1388 1389 1390 1391 1392
static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
{
	/* for default tnl0 device allow to change only the proto */
	t->parms.proto = p->proto;
	netdev_state_change(t->dev);
	return 0;
}

X
xeb@mail.ru 已提交
1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420
static void
ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
{
	p->laddr = u->laddr;
	p->raddr = u->raddr;
	p->flags = u->flags;
	p->hop_limit = u->hop_limit;
	p->encap_limit = u->encap_limit;
	p->flowinfo = u->flowinfo;
	p->link = u->link;
	p->proto = u->proto;
	memcpy(p->name, u->name, sizeof(u->name));
}

static void
ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
{
	u->laddr = p->laddr;
	u->raddr = p->raddr;
	u->flags = p->flags;
	u->hop_limit = p->hop_limit;
	u->encap_limit = p->encap_limit;
	u->flowinfo = p->flowinfo;
	u->link = p->link;
	u->proto = p->proto;
	memcpy(u->name, p->name, sizeof(u->name));
}

L
Linus Torvalds 已提交
1421
/**
1422
 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
L
Linus Torvalds 已提交
1423 1424 1425 1426 1427
 *   @dev: virtual device associated with tunnel
 *   @ifr: parameters passed from userspace
 *   @cmd: command to be performed
 *
 * Description:
1428
 *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
1429
 *   from userspace.
L
Linus Torvalds 已提交
1430 1431 1432 1433 1434 1435 1436
 *
 *   The possible commands are the following:
 *     %SIOCGETTUNNEL: get tunnel parameters for device
 *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
 *     %SIOCCHGTUNNEL: change tunnel parameters to those given
 *     %SIOCDELTUNNEL: delete tunnel
 *
1437
 *   The fallback device "ip6tnl0", created during module
L
Linus Torvalds 已提交
1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449
 *   initialization, can be used for creating other tunnel devices.
 *
 * Return:
 *   0 on success,
 *   %-EFAULT if unable to copy data to or from userspace,
 *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
 *   %-EINVAL if passed tunnel parameters are invalid,
 *   %-EEXIST if changing a tunnel's parameters would cause a conflict
 *   %-ENODEV if attempting to change or delete a nonexisting device
 **/

static int
1450
ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
L
Linus Torvalds 已提交
1451 1452 1453
{
	int err = 0;
	struct ip6_tnl_parm p;
X
xeb@mail.ru 已提交
1454
	struct __ip6_tnl_parm p1;
1455 1456
	struct ip6_tnl *t = netdev_priv(dev);
	struct net *net = t->net;
1457
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
L
Linus Torvalds 已提交
1458 1459 1460

	switch (cmd) {
	case SIOCGETTUNNEL:
1461
		if (dev == ip6n->fb_tnl_dev) {
1462
			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
L
Linus Torvalds 已提交
1463 1464 1465
				err = -EFAULT;
				break;
			}
X
xeb@mail.ru 已提交
1466 1467
			ip6_tnl_parm_from_user(&p1, &p);
			t = ip6_tnl_locate(net, &p1, 0);
1468
			if (IS_ERR(t))
1469
				t = netdev_priv(dev);
1470 1471
		} else {
			memset(&p, 0, sizeof(p));
1472
		}
X
xeb@mail.ru 已提交
1473
		ip6_tnl_parm_to_user(&p, &t->parms);
1474
		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
L
Linus Torvalds 已提交
1475 1476 1477 1478 1479 1480
			err = -EFAULT;
		}
		break;
	case SIOCADDTUNNEL:
	case SIOCCHGTUNNEL:
		err = -EPERM;
1481
		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
L
Linus Torvalds 已提交
1482
			break;
1483
		err = -EFAULT;
1484
		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
L
Linus Torvalds 已提交
1485
			break;
1486
		err = -EINVAL;
1487 1488
		if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
		    p.proto != 0)
L
Linus Torvalds 已提交
1489
			break;
X
xeb@mail.ru 已提交
1490 1491
		ip6_tnl_parm_from_user(&p1, &p);
		t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1492
		if (cmd == SIOCCHGTUNNEL) {
1493
			if (!IS_ERR(t)) {
1494 1495 1496 1497 1498 1499
				if (t->dev != dev) {
					err = -EEXIST;
					break;
				}
			} else
				t = netdev_priv(dev);
1500 1501 1502 1503
			if (dev == ip6n->fb_tnl_dev)
				err = ip6_tnl0_update(t, &p1);
			else
				err = ip6_tnl_update(t, &p1);
L
Linus Torvalds 已提交
1504
		}
1505
		if (!IS_ERR(t)) {
L
Linus Torvalds 已提交
1506
			err = 0;
X
xeb@mail.ru 已提交
1507 1508
			ip6_tnl_parm_to_user(&p, &t->parms);
			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1509 1510
				err = -EFAULT;

1511 1512 1513
		} else {
			err = PTR_ERR(t);
		}
L
Linus Torvalds 已提交
1514 1515 1516
		break;
	case SIOCDELTUNNEL:
		err = -EPERM;
1517
		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
L
Linus Torvalds 已提交
1518 1519
			break;

1520
		if (dev == ip6n->fb_tnl_dev) {
1521
			err = -EFAULT;
1522
			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
L
Linus Torvalds 已提交
1523
				break;
1524
			err = -ENOENT;
X
xeb@mail.ru 已提交
1525 1526
			ip6_tnl_parm_from_user(&p1, &p);
			t = ip6_tnl_locate(net, &p1, 0);
1527
			if (IS_ERR(t))
L
Linus Torvalds 已提交
1528
				break;
1529
			err = -EPERM;
1530
			if (t->dev == ip6n->fb_tnl_dev)
L
Linus Torvalds 已提交
1531
				break;
1532
			dev = t->dev;
L
Linus Torvalds 已提交
1533
		}
1534 1535
		err = 0;
		unregister_netdevice(dev);
L
Linus Torvalds 已提交
1536 1537 1538 1539 1540 1541 1542 1543
		break;
	default:
		err = -EINVAL;
	}
	return err;
}

/**
1544
 * ip6_tnl_change_mtu - change mtu manually for tunnel device
L
Linus Torvalds 已提交
1545 1546 1547 1548 1549 1550 1551 1552 1553
 *   @dev: virtual device associated with tunnel
 *   @new_mtu: the new mtu
 *
 * Return:
 *   0 on success,
 *   %-EINVAL if mtu too small
 **/

static int
1554
ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
L
Linus Torvalds 已提交
1555
{
1556 1557 1558 1559 1560 1561 1562 1563
	struct ip6_tnl *tnl = netdev_priv(dev);

	if (tnl->parms.proto == IPPROTO_IPIP) {
		if (new_mtu < 68)
			return -EINVAL;
	} else {
		if (new_mtu < IPV6_MIN_MTU)
			return -EINVAL;
L
Linus Torvalds 已提交
1564
	}
1565 1566
	if (new_mtu > 0xFFF8 - dev->hard_header_len)
		return -EINVAL;
L
Linus Torvalds 已提交
1567 1568 1569 1570
	dev->mtu = new_mtu;
	return 0;
}

1571 1572 1573 1574 1575 1576 1577
int ip6_tnl_get_iflink(const struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);

	return t->parms.link;
}
EXPORT_SYMBOL(ip6_tnl_get_iflink);
1578 1579

static const struct net_device_ops ip6_tnl_netdev_ops = {
1580
	.ndo_init	= ip6_tnl_dev_init,
E
Eric Dumazet 已提交
1581
	.ndo_uninit	= ip6_tnl_dev_uninit,
1582
	.ndo_start_xmit = ip6_tnl_xmit,
E
Eric Dumazet 已提交
1583
	.ndo_do_ioctl	= ip6_tnl_ioctl,
1584
	.ndo_change_mtu = ip6_tnl_change_mtu,
E
Eric Dumazet 已提交
1585
	.ndo_get_stats	= ip6_get_stats,
1586
	.ndo_get_iflink = ip6_tnl_get_iflink,
1587 1588
};

E
Eric Dumazet 已提交
1589

L
Linus Torvalds 已提交
1590
/**
1591
 * ip6_tnl_dev_setup - setup virtual tunnel device
L
Linus Torvalds 已提交
1592 1593 1594 1595 1596 1597
 *   @dev: virtual device associated with tunnel
 *
 * Description:
 *   Initialize function pointers and device parameters
 **/

1598
static void ip6_tnl_dev_setup(struct net_device *dev)
L
Linus Torvalds 已提交
1599
{
1600 1601
	struct ip6_tnl *t;

1602
	dev->netdev_ops = &ip6_tnl_netdev_ops;
E
Eric Dumazet 已提交
1603
	dev->destructor = ip6_dev_free;
L
Linus Torvalds 已提交
1604 1605

	dev->type = ARPHRD_TUNNEL6;
1606 1607
	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
	dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr);
1608 1609
	t = netdev_priv(dev);
	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1610
		dev->mtu -= 8;
L
Linus Torvalds 已提交
1611 1612
	dev->flags |= IFF_NOARP;
	dev->addr_len = sizeof(struct in6_addr);
1613
	netif_keep_dst(dev);
1614 1615 1616
	/* This perm addr will be used as interface identifier by IPv6 */
	dev->addr_assign_type = NET_ADDR_RANDOM;
	eth_random_addr(dev->perm_addr);
L
Linus Torvalds 已提交
1617 1618 1619 1620
}


/**
1621
 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
L
Linus Torvalds 已提交
1622 1623 1624
 *   @dev: virtual device associated with tunnel
 **/

E
Eric Dumazet 已提交
1625
static inline int
1626
ip6_tnl_dev_init_gen(struct net_device *dev)
L
Linus Torvalds 已提交
1627
{
1628
	struct ip6_tnl *t = netdev_priv(dev);
1629
	int ret;
E
Eric Dumazet 已提交
1630

L
Linus Torvalds 已提交
1631
	t->dev = dev;
N
Nicolas Dichtel 已提交
1632
	t->net = dev_net(dev);
1633
	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
E
Eric Dumazet 已提交
1634 1635
	if (!dev->tstats)
		return -ENOMEM;
1636 1637 1638 1639 1640 1641 1642 1643

	ret = ip6_tnl_dst_init(t);
	if (ret) {
		free_percpu(dev->tstats);
		dev->tstats = NULL;
		return ret;
	}

E
Eric Dumazet 已提交
1644
	return 0;
L
Linus Torvalds 已提交
1645 1646 1647
}

/**
1648
 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
L
Linus Torvalds 已提交
1649 1650 1651
 *   @dev: virtual device associated with tunnel
 **/

E
Eric Dumazet 已提交
1652
static int ip6_tnl_dev_init(struct net_device *dev)
L
Linus Torvalds 已提交
1653
{
1654
	struct ip6_tnl *t = netdev_priv(dev);
E
Eric Dumazet 已提交
1655 1656 1657 1658
	int err = ip6_tnl_dev_init_gen(dev);

	if (err)
		return err;
1659
	ip6_tnl_link_config(t);
E
Eric Dumazet 已提交
1660
	return 0;
L
Linus Torvalds 已提交
1661 1662 1663
}

/**
1664
 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
L
Linus Torvalds 已提交
1665 1666 1667 1668 1669
 *   @dev: fallback device
 *
 * Return: 0
 **/

E
Eric Dumazet 已提交
1670
static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
L
Linus Torvalds 已提交
1671
{
1672
	struct ip6_tnl *t = netdev_priv(dev);
1673 1674 1675
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);

1676
	t->parms.proto = IPPROTO_IPV6;
L
Linus Torvalds 已提交
1677
	dev_hold(dev);
1678

1679
	rcu_assign_pointer(ip6n->tnls_wc[0], t);
E
Eric Dumazet 已提交
1680
	return 0;
L
Linus Torvalds 已提交
1681 1682
}

1683 1684 1685 1686
static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
{
	u8 proto;

1687
	if (!data || !data[IFLA_IPTUN_PROTO])
1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710
		return 0;

	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
	if (proto != IPPROTO_IPV6 &&
	    proto != IPPROTO_IPIP &&
	    proto != 0)
		return -EINVAL;

	return 0;
}

static void ip6_tnl_netlink_parms(struct nlattr *data[],
				  struct __ip6_tnl_parm *parms)
{
	memset(parms, 0, sizeof(*parms));

	if (!data)
		return;

	if (data[IFLA_IPTUN_LINK])
		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);

	if (data[IFLA_IPTUN_LOCAL])
1711
		parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]);
1712 1713

	if (data[IFLA_IPTUN_REMOTE])
1714
		parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]);
1715 1716 1717 1718 1719 1720 1721 1722

	if (data[IFLA_IPTUN_TTL])
		parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);

	if (data[IFLA_IPTUN_ENCAP_LIMIT])
		parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);

	if (data[IFLA_IPTUN_FLOWINFO])
1723
		parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735

	if (data[IFLA_IPTUN_FLAGS])
		parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);

	if (data[IFLA_IPTUN_PROTO])
		parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
}

static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
			   struct nlattr *tb[], struct nlattr *data[])
{
	struct net *net = dev_net(dev);
1736
	struct ip6_tnl *nt, *t;
1737 1738 1739 1740

	nt = netdev_priv(dev);
	ip6_tnl_netlink_parms(data, &nt->parms);

1741 1742
	t = ip6_tnl_locate(net, &nt->parms, 0);
	if (!IS_ERR(t))
1743 1744 1745 1746 1747 1748 1749 1750
		return -EEXIST;

	return ip6_tnl_create2(dev);
}

static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
			      struct nlattr *data[])
{
N
Nicolas Dichtel 已提交
1751
	struct ip6_tnl *t = netdev_priv(dev);
1752
	struct __ip6_tnl_parm p;
N
Nicolas Dichtel 已提交
1753
	struct net *net = t->net;
1754 1755 1756 1757 1758 1759 1760 1761
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);

	if (dev == ip6n->fb_tnl_dev)
		return -EINVAL;

	ip6_tnl_netlink_parms(data, &p);

	t = ip6_tnl_locate(net, &p, 0);
1762
	if (!IS_ERR(t)) {
1763 1764 1765 1766 1767 1768 1769 1770
		if (t->dev != dev)
			return -EEXIST;
	} else
		t = netdev_priv(dev);

	return ip6_tnl_update(t, &p);
}

1771 1772 1773 1774 1775 1776 1777 1778 1779
static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
{
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);

	if (dev != ip6n->fb_tnl_dev)
		unregister_netdevice_queue(dev, head);
}

1780
static size_t ip6_tnl_get_size(const struct net_device *dev)
1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796
{
	return
		/* IFLA_IPTUN_LINK */
		nla_total_size(4) +
		/* IFLA_IPTUN_LOCAL */
		nla_total_size(sizeof(struct in6_addr)) +
		/* IFLA_IPTUN_REMOTE */
		nla_total_size(sizeof(struct in6_addr)) +
		/* IFLA_IPTUN_TTL */
		nla_total_size(1) +
		/* IFLA_IPTUN_ENCAP_LIMIT */
		nla_total_size(1) +
		/* IFLA_IPTUN_FLOWINFO */
		nla_total_size(4) +
		/* IFLA_IPTUN_FLAGS */
		nla_total_size(4) +
1797 1798
		/* IFLA_IPTUN_PROTO */
		nla_total_size(1) +
1799 1800 1801
		0;
}

1802
static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
1803 1804 1805 1806 1807
{
	struct ip6_tnl *tunnel = netdev_priv(dev);
	struct __ip6_tnl_parm *parm = &tunnel->parms;

	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1808 1809
	    nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
	    nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) ||
1810 1811 1812
	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
	    nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
	    nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
1813 1814
	    nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
1815 1816 1817 1818 1819 1820 1821
		goto nla_put_failure;
	return 0;

nla_put_failure:
	return -EMSGSIZE;
}

1822 1823 1824 1825 1826 1827 1828 1829
struct net *ip6_tnl_get_link_net(const struct net_device *dev)
{
	struct ip6_tnl *tunnel = netdev_priv(dev);

	return tunnel->net;
}
EXPORT_SYMBOL(ip6_tnl_get_link_net);

1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840
static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 },
	[IFLA_IPTUN_LOCAL]		= { .len = sizeof(struct in6_addr) },
	[IFLA_IPTUN_REMOTE]		= { .len = sizeof(struct in6_addr) },
	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 },
	[IFLA_IPTUN_ENCAP_LIMIT]	= { .type = NLA_U8 },
	[IFLA_IPTUN_FLOWINFO]		= { .type = NLA_U32 },
	[IFLA_IPTUN_FLAGS]		= { .type = NLA_U32 },
	[IFLA_IPTUN_PROTO]		= { .type = NLA_U8 },
};

1841 1842 1843
static struct rtnl_link_ops ip6_link_ops __read_mostly = {
	.kind		= "ip6tnl",
	.maxtype	= IFLA_IPTUN_MAX,
1844
	.policy		= ip6_tnl_policy,
1845
	.priv_size	= sizeof(struct ip6_tnl),
1846 1847 1848 1849
	.setup		= ip6_tnl_dev_setup,
	.validate	= ip6_tnl_validate,
	.newlink	= ip6_tnl_newlink,
	.changelink	= ip6_tnl_changelink,
1850
	.dellink	= ip6_tnl_dellink,
1851 1852
	.get_size	= ip6_tnl_get_size,
	.fill_info	= ip6_tnl_fill_info,
1853
	.get_link_net	= ip6_tnl_get_link_net,
1854 1855
};

1856
static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1857 1858 1859 1860 1861
	.handler	= ip4ip6_rcv,
	.err_handler	= ip4ip6_err,
	.priority	=	1,
};

1862
static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1863 1864
	.handler	= ip6ip6_rcv,
	.err_handler	= ip6ip6_err,
H
Herbert Xu 已提交
1865
	.priority	=	1,
L
Linus Torvalds 已提交
1866 1867
};

1868
static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
1869
{
1870
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
N
Nicolas Dichtel 已提交
1871
	struct net_device *dev, *aux;
1872 1873
	int h;
	struct ip6_tnl *t;
1874
	LIST_HEAD(list);
1875

N
Nicolas Dichtel 已提交
1876 1877 1878 1879
	for_each_netdev_safe(net, dev, aux)
		if (dev->rtnl_link_ops == &ip6_link_ops)
			unregister_netdevice_queue(dev, &list);

1880
	for (h = 0; h < HASH_SIZE; h++) {
E
Eric Dumazet 已提交
1881
		t = rtnl_dereference(ip6n->tnls_r_l[h]);
1882
		while (t) {
N
Nicolas Dichtel 已提交
1883 1884 1885 1886 1887
			/* If dev is in the same netns, it has already
			 * been added to the list by the previous loop.
			 */
			if (!net_eq(dev_net(t->dev), net))
				unregister_netdevice_queue(t->dev, &list);
E
Eric Dumazet 已提交
1888
			t = rtnl_dereference(t->next);
1889
		}
1890 1891
	}

1892
	unregister_netdevice_many(&list);
1893 1894
}

1895
static int __net_init ip6_tnl_init_net(struct net *net)
1896
{
1897
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1898
	struct ip6_tnl *t = NULL;
1899 1900
	int err;

1901 1902 1903
	ip6n->tnls[0] = ip6n->tnls_wc;
	ip6n->tnls[1] = ip6n->tnls_r_l;

1904 1905
	err = -ENOMEM;
	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1906
					NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
1907 1908 1909

	if (!ip6n->fb_tnl_dev)
		goto err_alloc_dev;
1910
	dev_net_set(ip6n->fb_tnl_dev, net);
1911
	ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
N
Nicolas Dichtel 已提交
1912 1913 1914 1915
	/* FB netdevice is special: we have one, and only one per netns.
	 * Allowing to move it to another netns is clearly unsafe.
	 */
	ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
1916

E
Eric Dumazet 已提交
1917 1918 1919
	err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
	if (err < 0)
		goto err_register;
1920 1921 1922 1923

	err = register_netdev(ip6n->fb_tnl_dev);
	if (err < 0)
		goto err_register;
1924 1925 1926 1927

	t = netdev_priv(ip6n->fb_tnl_dev);

	strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
1928 1929
	return 0;

1930
err_register:
E
Eric Dumazet 已提交
1931
	ip6_dev_free(ip6n->fb_tnl_dev);
1932
err_alloc_dev:
1933 1934 1935
	return err;
}

1936
static void __net_exit ip6_tnl_exit_net(struct net *net)
1937
{
1938
	rtnl_lock();
1939
	ip6_tnl_destroy_tunnels(net);
1940
	rtnl_unlock();
1941 1942 1943 1944 1945
}

static struct pernet_operations ip6_tnl_net_ops = {
	.init = ip6_tnl_init_net,
	.exit = ip6_tnl_exit_net,
1946 1947
	.id   = &ip6_tnl_net_id,
	.size = sizeof(struct ip6_tnl_net),
1948 1949
};

L
Linus Torvalds 已提交
1950 1951 1952 1953 1954 1955 1956 1957 1958 1959
/**
 * ip6_tunnel_init - register protocol and reserve needed resources
 *
 * Return: 0 on success
 **/

static int __init ip6_tunnel_init(void)
{
	int  err;

1960 1961 1962 1963 1964 1965
	err = register_pernet_device(&ip6_tnl_net_ops);
	if (err < 0)
		goto out_pernet;

	err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
	if (err < 0) {
1966
		pr_err("%s: can't register ip4ip6\n", __func__);
1967
		goto out_ip4ip6;
1968 1969
	}

1970 1971
	err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
	if (err < 0) {
1972
		pr_err("%s: can't register ip6ip6\n", __func__);
1973
		goto out_ip6ip6;
L
Linus Torvalds 已提交
1974
	}
1975 1976 1977
	err = rtnl_link_register(&ip6_link_ops);
	if (err < 0)
		goto rtnl_link_failed;
1978

L
Linus Torvalds 已提交
1979
	return 0;
1980

1981 1982
rtnl_link_failed:
	xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
1983
out_ip6ip6:
1984
	xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1985 1986 1987
out_ip4ip6:
	unregister_pernet_device(&ip6_tnl_net_ops);
out_pernet:
L
Linus Torvalds 已提交
1988 1989 1990 1991 1992 1993 1994 1995 1996
	return err;
}

/**
 * ip6_tunnel_cleanup - free resources and unregister protocol
 **/

static void __exit ip6_tunnel_cleanup(void)
{
1997
	rtnl_link_unregister(&ip6_link_ops);
1998
	if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
1999
		pr_info("%s: can't deregister ip4ip6\n", __func__);
2000

2001
	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
2002
		pr_info("%s: can't deregister ip6ip6\n", __func__);
L
Linus Torvalds 已提交
2003

2004
	unregister_pernet_device(&ip6_tnl_net_ops);
L
Linus Torvalds 已提交
2005 2006 2007 2008
}

module_init(ip6_tunnel_init);
module_exit(ip6_tunnel_cleanup);