reassembly.c 19.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 *	IPv6 fragment reassembly
3
 *	Linux INET6 implementation
L
Linus Torvalds 已提交
4 5
 *
 *	Authors:
6
 *	Pedro Roque		<roque@di.fc.ul.pt>
L
Linus Torvalds 已提交
7 8 9 10 11 12 13 14 15
 *
 *	Based on: net/ipv4/ip_fragment.c
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

16 17
/*
 *	Fixes:
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27 28
 *	Andi Kleen	Make it work with multiple hosts.
 *			More RFC compliance.
 *
 *      Horst von Brand Add missing #include <linux/string.h>
 *	Alexey Kuznetsov	SMP races, threading, cleanup.
 *	Patrick McHardy		LRU queue of frag heads for evictor.
 *	Mitsuru KANDA @USAGI	Register inet6_protocol{}.
 *	David Stevens and
 *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
 *				calculate ICV correctly.
 */
29 30 31

#define pr_fmt(fmt) "IPv6: " fmt

L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/jiffies.h>
#include <linux/net.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
#include <linux/jhash.h>
46
#include <linux/skbuff.h>
47
#include <linux/slab.h>
48
#include <linux/export.h>
L
Linus Torvalds 已提交
49 50 51 52 53

#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
54
#include <net/ip6_route.h>
L
Linus Torvalds 已提交
55 56 57 58 59
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
60
#include <net/inet_frag.h>
61
#include <net/inet_ecn.h>
L
Linus Torvalds 已提交
62

63 64
static const char ip6_frag_cache_name[] = "ip6-frags";

65
struct ip6frag_skb_cb {
L
Linus Torvalds 已提交
66 67 68 69
	struct inet6_skb_parm	h;
	int			offset;
};

70
#define FRAG6_CB(skb)	((struct ip6frag_skb_cb *)((skb)->cb))
L
Linus Torvalds 已提交
71

72
static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
73 74 75
{
	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
L
Linus Torvalds 已提交
76

77
static struct inet_frags ip6_frags;
L
Linus Torvalds 已提交
78

79 80 81
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
			  struct net_device *dev);

82
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
L
Linus Torvalds 已提交
83
{
84
	struct frag_queue *fq = container_of(q, struct frag_queue, q);
85
	const struct frag_v6_compare_key *key = a;
86

87 88
	q->key.v6 = *key;
	fq->ecn = 0;
L
Linus Torvalds 已提交
89
}
90
EXPORT_SYMBOL(ip6_frag_init);
L
Linus Torvalds 已提交
91

92
void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
L
Linus Torvalds 已提交
93
{
94
	struct net_device *dev = NULL;
95

96
	spin_lock(&fq->q.lock);
L
Linus Torvalds 已提交
97

98
	if (fq->q.flags & INET_FRAG_COMPLETE)
L
Linus Torvalds 已提交
99 100
		goto out;

101
	inet_frag_kill(&fq->q);
L
Linus Torvalds 已提交
102

E
Eric Dumazet 已提交
103 104
	rcu_read_lock();
	dev = dev_get_by_index_rcu(net, fq->iif);
105
	if (!dev)
E
Eric Dumazet 已提交
106
		goto out_rcu_unlock;
107

E
Eric Dumazet 已提交
108
	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
L
Linus Torvalds 已提交
109

110
	if (inet_frag_evicting(&fq->q))
111 112
		goto out_rcu_unlock;

E
Eric Dumazet 已提交
113
	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
114

115
	/* Don't send error if the first segment did not arrive. */
116
	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
E
Eric Dumazet 已提交
117
		goto out_rcu_unlock;
118

119 120 121
	/* But use as source device on which LAST ARRIVED
	 * segment was received. And do not use fq->dev
	 * pointer directly, device might already disappeared.
122
	 */
123
	fq->q.fragments->dev = dev;
124
	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
E
Eric Dumazet 已提交
125 126
out_rcu_unlock:
	rcu_read_unlock();
L
Linus Torvalds 已提交
127
out:
128
	spin_unlock(&fq->q.lock);
129
	inet_frag_put(&fq->q);
130 131 132
}
EXPORT_SYMBOL(ip6_expire_frag_queue);

133
static void ip6_frag_expire(struct timer_list *t)
134
{
135
	struct inet_frag_queue *frag = from_timer(frag, t, timer);
136 137 138
	struct frag_queue *fq;
	struct net *net;

139
	fq = container_of(frag, struct frag_queue, q);
140 141
	net = container_of(fq->q.net, struct net, ipv6.frags);

142
	ip6_expire_frag_queue(net, fq);
L
Linus Torvalds 已提交
143 144
}

145
static struct frag_queue *
146
fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
L
Linus Torvalds 已提交
147
{
148 149 150 151 152 153 154
	struct frag_v6_compare_key key = {
		.id = id,
		.saddr = hdr->saddr,
		.daddr = hdr->daddr,
		.user = IP6_DEFRAG_LOCAL_DELIVER,
		.iif = iif,
	};
155
	struct inet_frag_queue *q;
156

157 158 159
	if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
					    IPV6_ADDR_LINKLOCAL)))
		key.iif = 0;
L
Linus Torvalds 已提交
160

161
	q = inet_frag_find(&net->ipv6.frags, &key);
162 163
	if (IS_ERR_OR_NULL(q)) {
		inet_frag_maybe_warn_overflow(q, pr_fmt());
164
		return NULL;
165
	}
166
	return container_of(q, struct frag_queue, q);
L
Linus Torvalds 已提交
167 168
}

169
static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
L
Linus Torvalds 已提交
170 171 172
			   struct frag_hdr *fhdr, int nhoff)
{
	struct sk_buff *prev, *next;
173
	struct net_device *dev;
174
	int offset, end, fragsize;
E
Eric Dumazet 已提交
175
	struct net *net = dev_net(skb_dst(skb)->dev);
176
	u8 ecn;
L
Linus Torvalds 已提交
177

178
	if (fq->q.flags & INET_FRAG_COMPLETE)
L
Linus Torvalds 已提交
179 180 181
		goto err;

	offset = ntohs(fhdr->frag_off) & ~0x7;
182 183
	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
L
Linus Torvalds 已提交
184 185

	if ((unsigned int)end > IPV6_MAXPLEN) {
E
Eric Dumazet 已提交
186 187
		__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				IPSTATS_MIB_INHDRERRORS);
188 189 190
		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
				  ((u8 *)&fhdr->frag_off -
				   skb_network_header(skb)));
191
		return -1;
L
Linus Torvalds 已提交
192 193
	}

194 195
	ecn = ip6_frag_ecn(ipv6_hdr(skb));

196 197
	if (skb->ip_summed == CHECKSUM_COMPLETE) {
		const unsigned char *nh = skb_network_header(skb);
198
		skb->csum = csum_sub(skb->csum,
199 200 201
				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
						  0));
	}
L
Linus Torvalds 已提交
202 203 204 205 206 207

	/* Is this the final fragment? */
	if (!(fhdr->frag_off & htons(IP6_MF))) {
		/* If we already have some bits beyond end
		 * or have different end, the segment is corrupted.
		 */
208
		if (end < fq->q.len ||
209
		    ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
L
Linus Torvalds 已提交
210
			goto err;
211
		fq->q.flags |= INET_FRAG_LAST_IN;
212
		fq->q.len = end;
L
Linus Torvalds 已提交
213 214 215 216 217 218 219 220
	} else {
		/* Check if the fragment is rounded to 8 bytes.
		 * Required by the RFC.
		 */
		if (end & 0x7) {
			/* RFC2460 says always send parameter problem in
			 * this case. -DaveM
			 */
E
Eric Dumazet 已提交
221 222
			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
					IPSTATS_MIB_INHDRERRORS);
223
			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
L
Linus Torvalds 已提交
224
					  offsetof(struct ipv6hdr, payload_len));
225
			return -1;
L
Linus Torvalds 已提交
226
		}
227
		if (end > fq->q.len) {
L
Linus Torvalds 已提交
228
			/* Some bits beyond end -> corruption. */
229
			if (fq->q.flags & INET_FRAG_LAST_IN)
L
Linus Torvalds 已提交
230
				goto err;
231
			fq->q.len = end;
L
Linus Torvalds 已提交
232 233 234 235 236 237 238 239 240
		}
	}

	if (end == offset)
		goto err;

	/* Point into the IP datagram 'data' part. */
	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
		goto err;
241

242 243
	if (pskb_trim_rcsum(skb, end - offset))
		goto err;
L
Linus Torvalds 已提交
244 245 246 247 248

	/* Find out which fragments are in front and at the back of us
	 * in the chain of fragments so far.  We must know where to put
	 * this fragment, right?
	 */
249 250 251 252 253
	prev = fq->q.fragments_tail;
	if (!prev || FRAG6_CB(prev)->offset < offset) {
		next = NULL;
		goto found;
	}
L
Linus Torvalds 已提交
254
	prev = NULL;
255
	for (next = fq->q.fragments; next != NULL; next = next->next) {
L
Linus Torvalds 已提交
256 257 258 259 260
		if (FRAG6_CB(next)->offset >= offset)
			break;	/* bingo! */
		prev = next;
	}

261
found:
E
Eric Dumazet 已提交
262 263
	/* RFC5722, Section 4, amended by Errata ID : 3089
	 *                          When reassembling an IPv6 datagram, if
264 265
	 *   one or more its constituent fragments is determined to be an
	 *   overlapping fragment, the entire datagram (and any constituent
E
Eric Dumazet 已提交
266
	 *   fragments) MUST be silently discarded.
L
Linus Torvalds 已提交
267 268
	 */

269 270
	/* Check for overlap with preceding fragment. */
	if (prev &&
S
Shan Wei 已提交
271
	    (FRAG6_CB(prev)->offset + prev->len) > offset)
272
		goto discard_fq;
L
Linus Torvalds 已提交
273

274 275 276
	/* Look for overlap with succeeding segment. */
	if (next && FRAG6_CB(next)->offset < end)
		goto discard_fq;
L
Linus Torvalds 已提交
277 278 279 280 281

	FRAG6_CB(skb)->offset = offset;

	/* Insert this fragment in the chain of fragments. */
	skb->next = next;
282 283
	if (!next)
		fq->q.fragments_tail = skb;
L
Linus Torvalds 已提交
284 285 286
	if (prev)
		prev->next = skb;
	else
287
		fq->q.fragments = skb;
L
Linus Torvalds 已提交
288

289 290 291 292 293
	dev = skb->dev;
	if (dev) {
		fq->iif = dev->ifindex;
		skb->dev = NULL;
	}
294 295
	fq->q.stamp = skb->tstamp;
	fq->q.meat += skb->len;
296
	fq->ecn |= ecn;
297
	add_frag_mem_limit(fq->q.net, skb->truesize);
L
Linus Torvalds 已提交
298

299 300 301 302
	fragsize = -skb_network_offset(skb) + skb->len;
	if (fragsize > fq->q.max_size)
		fq->q.max_size = fragsize;

L
Linus Torvalds 已提交
303 304 305 306 307
	/* The first fragment.
	 * nhoffset is obtained from the first fragment, of course.
	 */
	if (offset == 0) {
		fq->nhoffset = nhoff;
308
		fq->q.flags |= INET_FRAG_FIRST_IN;
L
Linus Torvalds 已提交
309
	}
310

311
	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
312 313 314 315 316 317 318 319 320
	    fq->q.meat == fq->q.len) {
		int res;
		unsigned long orefdst = skb->_skb_refdst;

		skb->_skb_refdst = 0UL;
		res = ip6_frag_reasm(fq, prev, dev);
		skb->_skb_refdst = orefdst;
		return res;
	}
321

322
	skb_dst_drop(skb);
323
	return -1;
L
Linus Torvalds 已提交
324

325
discard_fq:
326
	inet_frag_kill(&fq->q);
L
Linus Torvalds 已提交
327
err:
E
Eric Dumazet 已提交
328 329
	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
			IPSTATS_MIB_REASMFAILS);
L
Linus Torvalds 已提交
330
	kfree_skb(skb);
331
	return -1;
L
Linus Torvalds 已提交
332 333 334 335 336 337 338 339 340 341 342
}

/*
 *	Check if this packet is complete.
 *	Returns NULL on failure by any reason, and pointer
 *	to current nexthdr field in reassembled frame.
 *
 *	It is called with locked fq, and caller must check that
 *	queue is eligible for reassembly i.e. it is not COMPLETE,
 *	the last and the first frames arrived and all the bits are here.
 */
343
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
L
Linus Torvalds 已提交
344 345
			  struct net_device *dev)
{
346
	struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
347
	struct sk_buff *fp, *head = fq->q.fragments;
L
Linus Torvalds 已提交
348 349
	int    payload_len;
	unsigned int nhoff;
350
	int sum_truesize;
351
	u8 ecn;
L
Linus Torvalds 已提交
352

353
	inet_frag_kill(&fq->q);
L
Linus Torvalds 已提交
354

355 356 357 358
	ecn = ip_frag_ecn_table[fq->ecn];
	if (unlikely(ecn == 0xff))
		goto out_fail;

359 360 361 362 363 364 365 366 367
	/* Make the one we just received the head. */
	if (prev) {
		head = prev->next;
		fp = skb_clone(head, GFP_ATOMIC);

		if (!fp)
			goto out_oom;

		fp->next = head->next;
368 369
		if (!fp->next)
			fq->q.fragments_tail = fp;
370 371
		prev->next = fp;

372 373
		skb_morph(head, fq->q.fragments);
		head->next = fq->q.fragments->next;
374

375
		consume_skb(fq->q.fragments);
376
		fq->q.fragments = head;
377 378
	}

379 380
	WARN_ON(head == NULL);
	WARN_ON(FRAG6_CB(head)->offset != 0);
L
Linus Torvalds 已提交
381 382

	/* Unfragmented part is taken from the first segment. */
383
	payload_len = ((head->data - skb_network_header(head)) -
384
		       sizeof(struct ipv6hdr) + fq->q.len -
385
		       sizeof(struct frag_hdr));
L
Linus Torvalds 已提交
386 387 388 389
	if (payload_len > IPV6_MAXPLEN)
		goto out_oversize;

	/* Head of list must not be cloned. */
390
	if (skb_unclone(head, GFP_ATOMIC))
L
Linus Torvalds 已提交
391 392 393 394 395
		goto out_oom;

	/* If the first fragment is fragmented itself, we split
	 * it to two chunks: the first with data and paged part
	 * and the second, holding only fragments. */
396
	if (skb_has_frag_list(head)) {
L
Linus Torvalds 已提交
397 398 399
		struct sk_buff *clone;
		int i, plen = 0;

400
		clone = alloc_skb(0, GFP_ATOMIC);
401
		if (!clone)
L
Linus Torvalds 已提交
402 403 404 405
			goto out_oom;
		clone->next = head->next;
		head->next = clone;
		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
406
		skb_frag_list_init(head);
E
Eric Dumazet 已提交
407 408
		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
L
Linus Torvalds 已提交
409 410 411 412 413
		clone->len = clone->data_len = head->data_len - plen;
		head->data_len -= clone->len;
		head->len -= clone->len;
		clone->csum = 0;
		clone->ip_summed = head->ip_summed;
414
		add_frag_mem_limit(fq->q.net, clone->truesize);
L
Linus Torvalds 已提交
415 416 417 418 419
	}

	/* We have to remove fragment header from datagram and to relocate
	 * header in order to calculate ICV correctly. */
	nhoff = fq->nhoffset;
420
	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
421
	memmove(head->head + sizeof(struct frag_hdr), head->head,
L
Linus Torvalds 已提交
422
		(head->data - head->head) - sizeof(struct frag_hdr));
423 424
	if (skb_mac_header_was_set(head))
		head->mac_header += sizeof(struct frag_hdr);
425
	head->network_header += sizeof(struct frag_hdr);
L
Linus Torvalds 已提交
426

427
	skb_reset_transport_header(head);
428
	skb_push(head, head->data - skb_network_header(head));
L
Linus Torvalds 已提交
429

430 431 432 433 434 435 436
	sum_truesize = head->truesize;
	for (fp = head->next; fp;) {
		bool headstolen;
		int delta;
		struct sk_buff *next = fp->next;

		sum_truesize += fp->truesize;
L
Linus Torvalds 已提交
437 438
		if (head->ip_summed != fp->ip_summed)
			head->ip_summed = CHECKSUM_NONE;
439
		else if (head->ip_summed == CHECKSUM_COMPLETE)
L
Linus Torvalds 已提交
440
			head->csum = csum_add(head->csum, fp->csum);
441 442 443 444 445 446 447 448 449 450 451

		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
			kfree_skb_partial(fp, headstolen);
		} else {
			if (!skb_shinfo(head)->frag_list)
				skb_shinfo(head)->frag_list = fp;
			head->data_len += fp->len;
			head->len += fp->len;
			head->truesize += fp->truesize;
		}
		fp = next;
L
Linus Torvalds 已提交
452
	}
453
	sub_frag_mem_limit(fq->q.net, sum_truesize);
L
Linus Torvalds 已提交
454 455 456

	head->next = NULL;
	head->dev = dev;
457
	head->tstamp = fq->q.stamp;
458
	ipv6_hdr(head)->payload_len = htons(payload_len);
459
	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
460
	IP6CB(head)->nhoff = nhoff;
461
	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
462
	IP6CB(head)->frag_max_size = fq->q.max_size;
L
Linus Torvalds 已提交
463 464

	/* Yes, and fold redundant checksum back. 8) */
465 466
	skb_postpush_rcsum(head, skb_network_header(head),
			   skb_network_header_len(head));
L
Linus Torvalds 已提交
467

468
	rcu_read_lock();
E
Eric Dumazet 已提交
469
	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
470
	rcu_read_unlock();
471
	fq->q.fragments = NULL;
472
	fq->q.fragments_tail = NULL;
L
Linus Torvalds 已提交
473 474 475
	return 1;

out_oversize:
476
	net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);
L
Linus Torvalds 已提交
477 478
	goto out_fail;
out_oom:
479
	net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
L
Linus Torvalds 已提交
480
out_fail:
481
	rcu_read_lock();
E
Eric Dumazet 已提交
482
	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
483
	rcu_read_unlock();
L
Linus Torvalds 已提交
484 485 486
	return -1;
}

487
static int ipv6_frag_rcv(struct sk_buff *skb)
L
Linus Torvalds 已提交
488 489 490
{
	struct frag_hdr *fhdr;
	struct frag_queue *fq;
491
	const struct ipv6hdr *hdr = ipv6_hdr(skb);
E
Eric Dumazet 已提交
492
	struct net *net = dev_net(skb_dst(skb)->dev);
493
	int iif;
L
Linus Torvalds 已提交
494

495 496 497
	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
		goto fail_hdr;

E
Eric Dumazet 已提交
498
	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
L
Linus Torvalds 已提交
499 500

	/* Jumbo payload inhibits frag. header */
501
	if (hdr->payload_len == 0)
502 503
		goto fail_hdr;

504
	if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
505 506
				 sizeof(struct frag_hdr))))
		goto fail_hdr;
L
Linus Torvalds 已提交
507

508
	hdr = ipv6_hdr(skb);
509
	fhdr = (struct frag_hdr *)skb_transport_header(skb);
L
Linus Torvalds 已提交
510 511 512

	if (!(fhdr->frag_off & htons(0xFFF9))) {
		/* It is not a fragmented frame */
513
		skb->transport_header += sizeof(struct frag_hdr);
E
Eric Dumazet 已提交
514 515
		__IP6_INC_STATS(net,
				ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
L
Linus Torvalds 已提交
516

517
		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
518
		IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
L
Linus Torvalds 已提交
519 520 521
		return 1;
	}

522 523
	iif = skb->dev ? skb->dev->ifindex : 0;
	fq = fq_find(net, fhdr->identification, hdr, iif);
524
	if (fq) {
525
		int ret;
L
Linus Torvalds 已提交
526

527
		spin_lock(&fq->q.lock);
L
Linus Torvalds 已提交
528

529
		fq->iif = iif;
530
		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
L
Linus Torvalds 已提交
531

532
		spin_unlock(&fq->q.lock);
533
		inet_frag_put(&fq->q);
L
Linus Torvalds 已提交
534 535 536
		return ret;
	}

E
Eric Dumazet 已提交
537
	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
L
Linus Torvalds 已提交
538 539
	kfree_skb(skb);
	return -1;
540 541

fail_hdr:
E
Eric Dumazet 已提交
542 543
	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
			IPSTATS_MIB_INHDRERRORS);
544 545
	icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
	return -1;
L
Linus Torvalds 已提交
546 547
}

548
static const struct inet6_protocol frag_protocol = {
L
Linus Torvalds 已提交
549 550 551 552
	.handler	=	ipv6_frag_rcv,
	.flags		=	INET6_PROTO_NOPOLICY,
};

553
#ifdef CONFIG_SYSCTL
554 555
static int zero;

556
static struct ctl_table ip6_frags_ns_ctl_table[] = {
557 558
	{
		.procname	= "ip6frag_high_thresh",
559
		.data		= &init_net.ipv6.frags.high_thresh,
560 561
		.maxlen		= sizeof(int),
		.mode		= 0644,
562 563
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &init_net.ipv6.frags.low_thresh
564 565 566
	},
	{
		.procname	= "ip6frag_low_thresh",
567
		.data		= &init_net.ipv6.frags.low_thresh,
568 569
		.maxlen		= sizeof(int),
		.mode		= 0644,
570 571 572
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &zero,
		.extra2		= &init_net.ipv6.frags.high_thresh
573 574 575
	},
	{
		.procname	= "ip6frag_time",
576
		.data		= &init_net.ipv6.frags.timeout,
577 578
		.maxlen		= sizeof(int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
579
		.proc_handler	= proc_dointvec_jiffies,
580
	},
581 582 583
	{ }
};

584 585
/* secret interval has been deprecated */
static int ip6_frags_secret_interval_unused;
586
static struct ctl_table ip6_frags_ctl_table[] = {
587 588
	{
		.procname	= "ip6frag_secret_interval",
589
		.data		= &ip6_frags_secret_interval_unused,
590 591
		.maxlen		= sizeof(int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
592
		.proc_handler	= proc_dointvec_jiffies,
593 594 595 596
	},
	{ }
};

597
static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
598
{
599
	struct ctl_table *table;
600 601
	struct ctl_table_header *hdr;

602
	table = ip6_frags_ns_ctl_table;
O
Octavian Purdila 已提交
603
	if (!net_eq(net, &init_net)) {
604
		table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
605
		if (!table)
606 607
			goto err_alloc;

608
		table[0].data = &net->ipv6.frags.high_thresh;
609 610
		table[0].extra1 = &net->ipv6.frags.low_thresh;
		table[0].extra2 = &init_net.ipv6.frags.high_thresh;
611
		table[1].data = &net->ipv6.frags.low_thresh;
612
		table[1].extra2 = &net->ipv6.frags.high_thresh;
613
		table[2].data = &net->ipv6.frags.timeout;
614 615
	}

616
	hdr = register_net_sysctl(net, "net/ipv6", table);
617
	if (!hdr)
618 619 620 621 622 623
		goto err_reg;

	net->ipv6.sysctl.frags_hdr = hdr;
	return 0;

err_reg:
O
Octavian Purdila 已提交
624
	if (!net_eq(net, &init_net))
625 626 627 628 629
		kfree(table);
err_alloc:
	return -ENOMEM;
}

630
static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
631 632 633 634 635
{
	struct ctl_table *table;

	table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
	unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
636 637
	if (!net_eq(net, &init_net))
		kfree(table);
638
}
639 640 641 642 643

static struct ctl_table_header *ip6_ctl_header;

static int ip6_frags_sysctl_register(void)
{
644
	ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",
645 646 647 648 649 650 651 652
			ip6_frags_ctl_table);
	return ip6_ctl_header == NULL ? -ENOMEM : 0;
}

static void ip6_frags_sysctl_unregister(void)
{
	unregister_net_sysctl_table(ip6_ctl_header);
}
653
#else
654
static int ip6_frags_ns_sysctl_register(struct net *net)
655
{
656 657
	return 0;
}
658

659
static void ip6_frags_ns_sysctl_unregister(struct net *net)
660 661
{
}
662

663
static int ip6_frags_sysctl_register(void)
664 665 666 667
{
	return 0;
}

668
static void ip6_frags_sysctl_unregister(void)
669 670
{
}
671
#endif
D
Daniel Lezcano 已提交
672

673
static int __net_init ipv6_frags_init_net(struct net *net)
674
{
675 676
	int res;

677 678
	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
679
	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
680
	net->ipv6.frags.f = &ip6_frags;
681

682 683 684
	res = inet_frags_init_net(&net->ipv6.frags);
	if (res < 0)
		return res;
685

686 687
	res = ip6_frags_ns_sysctl_register(net);
	if (res < 0)
688
		inet_frags_exit_net(&net->ipv6.frags);
689
	return res;
690 691
}

692
static void __net_exit ipv6_frags_exit_net(struct net *net)
693
{
694
	ip6_frags_ns_sysctl_unregister(net);
695
	inet_frags_exit_net(&net->ipv6.frags);
696 697 698 699 700 701 702
}

static struct pernet_operations ip6_frags_ops = {
	.init = ipv6_frags_init_net,
	.exit = ipv6_frags_exit_net,
};

703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
{
	return jhash2(data,
		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}

static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
{
	const struct inet_frag_queue *fq = data;

	return jhash2((const u32 *)&fq->key.v6,
		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
}

static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
{
	const struct frag_v6_compare_key *key = arg->key;
	const struct inet_frag_queue *fq = ptr;

	return !!memcmp(&fq->key, key, sizeof(*key));
}

const struct rhashtable_params ip6_rhash_params = {
	.head_offset		= offsetof(struct inet_frag_queue, node),
	.hashfn			= ip6_key_hashfn,
	.obj_hashfn		= ip6_obj_hashfn,
	.obj_cmpfn		= ip6_obj_cmpfn,
	.automatic_shrinking	= true,
};
EXPORT_SYMBOL(ip6_rhash_params);

734
int __init ipv6_frag_init(void)
L
Linus Torvalds 已提交
735
{
736
	int ret;
L
Linus Torvalds 已提交
737

738 739 740 741 742
	ip6_frags.constructor = ip6_frag_init;
	ip6_frags.destructor = NULL;
	ip6_frags.qsize = sizeof(struct frag_queue);
	ip6_frags.frag_expire = ip6_frag_expire;
	ip6_frags.frags_cache_name = ip6_frag_cache_name;
743
	ip6_frags.rhash_params = ip6_rhash_params;
744
	ret = inet_frags_init(&ip6_frags);
745 746
	if (ret)
		goto out;
747

748 749 750 751
	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
	if (ret)
		goto err_protocol;

752 753 754 755
	ret = ip6_frags_sysctl_register();
	if (ret)
		goto err_sysctl;

756 757 758
	ret = register_pernet_subsys(&ip6_frags_ops);
	if (ret)
		goto err_pernet;
759

760 761
out:
	return ret;
762 763

err_pernet:
764 765
	ip6_frags_sysctl_unregister();
err_sysctl:
766
	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
767 768
err_protocol:
	inet_frags_fini(&ip6_frags);
769
	goto out;
770 771 772 773 774
}

void ipv6_frag_exit(void)
{
	inet_frags_fini(&ip6_frags);
775
	ip6_frags_sysctl_unregister();
776
	unregister_pernet_subsys(&ip6_frags_ops);
777
	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
L
Linus Torvalds 已提交
778
}