ip_fragment.c 15.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		The IP fragmentation functionality.
7
 *
L
Linus Torvalds 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 * Version:	$Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $
 *
 * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
 *		Alan Cox <Alan.Cox@linux.org>
 *
 * Fixes:
 *		Alan Cox	:	Split from ip.c , see ip_input.c for history.
 *		David S. Miller :	Begin massive cleanup...
 *		Andi Kleen	:	Add sysctls.
 *		xxxx		:	Overlapfrag bug.
 *		Ultima          :       ip_expire() kernel panic.
 *		Bill Hawes	:	Frag accounting and evictor fixes.
 *		John McDonald	:	0 length frag bug.
 *		Alexey Kuznetsov:	SMP races, threading, cleanup.
 *		Patrick McHardy :	LRU queue of frag heads for evictor.
 */

H
Herbert Xu 已提交
25
#include <linux/compiler.h>
L
Linus Torvalds 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/jiffies.h>
#include <linux/skbuff.h>
#include <linux/list.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/netdevice.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/checksum.h>
H
Herbert Xu 已提交
41
#include <net/inetpeer.h>
42
#include <net/inet_frag.h>
L
Linus Torvalds 已提交
43 44 45 46 47 48 49 50 51 52
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/inet.h>
#include <linux/netfilter_ipv4.h>

/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
 * as well. Or notify me, at least. --ANK
 */

53
static int sysctl_ipfrag_max_dist __read_mostly = 64;
H
Herbert Xu 已提交
54

L
Linus Torvalds 已提交
55 56 57 58 59 60 61 62 63 64
struct ipfrag_skb_cb
{
	struct inet_skb_parm	h;
	int			offset;
};

#define FRAG_CB(skb)	((struct ipfrag_skb_cb*)((skb)->cb))

/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
65 66
	struct inet_frag_queue q;

L
Linus Torvalds 已提交
67
	u32		user;
68 69 70
	__be32		saddr;
	__be32		daddr;
	__be16		id;
L
Linus Torvalds 已提交
71
	u8		protocol;
H
Herbert Xu 已提交
72 73 74
	int             iif;
	unsigned int    rid;
	struct inet_peer *peer;
L
Linus Torvalds 已提交
75 76
};

77
static struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
	/*
	 * Fragment cache limits. We will commit 256K at one time. Should we
	 * cross that limit we will prune down to 192K. This should cope with
	 * even the most extreme cases without allowing an attacker to
	 * measurably harm machine performance.
	 */
	.high_thresh	 = 256 * 1024,
	.low_thresh	 = 192 * 1024,

	/*
	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
	 * by TTL.
	 */
	.timeout	 = IP_FRAG_TIME,
	.secret_interval = 10 * 60 * HZ,
};

96
static struct inet_frags ip4_frags;
L
Linus Torvalds 已提交
97

98
int ip_frag_nqueues(struct net *net)
99
{
100
	return net->ipv4.frags.nqueues;
101
}
L
Linus Torvalds 已提交
102

103 104 105 106
int ip_frag_mem(void)
{
	return atomic_read(&ip4_frags.mem);
}
L
Linus Torvalds 已提交
107

108 109 110
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
			 struct net_device *dev);

111 112 113 114 115
struct ip4_create_arg {
	struct iphdr *iph;
	u32 user;
};

116
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
L
Linus Torvalds 已提交
117
{
118 119
	return jhash_3words((__force u32)id << 16 | prot,
			    (__force u32)saddr, (__force u32)daddr,
120
			    ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
L
Linus Torvalds 已提交
121 122
}

123
static unsigned int ip4_hashfn(struct inet_frag_queue *q)
L
Linus Torvalds 已提交
124
{
125
	struct ipq *ipq;
L
Linus Torvalds 已提交
126

127 128
	ipq = container_of(q, struct ipq, q);
	return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
L
Linus Torvalds 已提交
129 130
}

131 132 133 134 135 136 137 138 139 140 141 142 143
static int ip4_frag_match(struct inet_frag_queue *q, void *a)
{
	struct ipq *qp;
	struct ip4_create_arg *arg = a;

	qp = container_of(q, struct ipq, q);
	return (qp->id == arg->iph->id &&
			qp->saddr == arg->iph->saddr &&
			qp->daddr == arg->iph->daddr &&
			qp->protocol == arg->iph->protocol &&
			qp->user == arg->user);
}

L
Linus Torvalds 已提交
144 145 146 147 148
/* Memory Tracking Functions. */
static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
{
	if (work)
		*work -= skb->truesize;
149
	atomic_sub(skb->truesize, &ip4_frags.mem);
L
Linus Torvalds 已提交
150 151 152
	kfree_skb(skb);
}

153 154 155 156 157 158 159 160 161 162 163 164 165 166
static void ip4_frag_init(struct inet_frag_queue *q, void *a)
{
	struct ipq *qp = container_of(q, struct ipq, q);
	struct ip4_create_arg *arg = a;

	qp->protocol = arg->iph->protocol;
	qp->id = arg->iph->id;
	qp->saddr = arg->iph->saddr;
	qp->daddr = arg->iph->daddr;
	qp->user = arg->user;
	qp->peer = sysctl_ipfrag_max_dist ?
		inet_getpeer(arg->iph->saddr, 1) : NULL;
}

167
static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
L
Linus Torvalds 已提交
168
{
169 170 171 172 173
	struct ipq *qp;

	qp = container_of(q, struct ipq, q);
	if (qp->peer)
		inet_putpeer(qp->peer);
L
Linus Torvalds 已提交
174 175 176 177 178
}


/* Destruction primitives. */

179
static __inline__ void ipq_put(struct ipq *ipq)
L
Linus Torvalds 已提交
180
{
P
Pavel Emelyanov 已提交
181
	inet_frag_put(&ipq->q, &ip4_frags);
L
Linus Torvalds 已提交
182 183 184 185 186 187 188
}

/* Kill ipq entry. It is not destroyed immediately,
 * because caller (and someone more) holds reference count.
 */
static void ipq_kill(struct ipq *ipq)
{
189
	inet_frag_kill(&ipq->q, &ip4_frags);
L
Linus Torvalds 已提交
190 191
}

192
/* Memory limiting on fragments.  Evictor trashes the oldest
L
Linus Torvalds 已提交
193 194 195 196
 * fragment queue until we are back under the threshold.
 */
static void ip_evictor(void)
{
197 198 199 200 201
	int evicted;

	evicted = inet_frag_evictor(&ip4_frags);
	if (evicted)
		IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
L
Linus Torvalds 已提交
202 203 204 205 206 207 208
}

/*
 * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
 */
static void ip_expire(unsigned long arg)
{
209 210 211
	struct ipq *qp;

	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
L
Linus Torvalds 已提交
212

213
	spin_lock(&qp->q.lock);
L
Linus Torvalds 已提交
214

215
	if (qp->q.last_in & COMPLETE)
L
Linus Torvalds 已提交
216 217 218 219 220 221 222
		goto out;

	ipq_kill(qp);

	IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);

223 224
	if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) {
		struct sk_buff *head = qp->q.fragments;
L
Linus Torvalds 已提交
225
		/* Send an ICMP "Fragment Reassembly Timeout" message. */
226
		if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) {
L
Linus Torvalds 已提交
227 228 229 230 231
			icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
			dev_put(head->dev);
		}
	}
out:
232
	spin_unlock(&qp->q.lock);
233
	ipq_put(qp);
L
Linus Torvalds 已提交
234 235
}

236 237 238
/* Find the correct entry in the "incomplete datagrams" queue for
 * this IP datagram, and create new one, if nothing is found.
 */
239
static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
L
Linus Torvalds 已提交
240
{
241 242
	struct inet_frag_queue *q;
	struct ip4_create_arg arg;
243
	unsigned int hash;
L
Linus Torvalds 已提交
244

245 246
	arg.iph = iph;
	arg.user = user;
247
	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
L
Linus Torvalds 已提交
248

249
	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
250 251
	if (q == NULL)
		goto out_nomem;
L
Linus Torvalds 已提交
252

253
	return container_of(q, struct ipq, q);
L
Linus Torvalds 已提交
254 255

out_nomem:
256
	LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
L
Linus Torvalds 已提交
257 258 259
	return NULL;
}

H
Herbert Xu 已提交
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
/* Is the fragment too far ahead to be part of ipq? */
static inline int ip_frag_too_far(struct ipq *qp)
{
	struct inet_peer *peer = qp->peer;
	unsigned int max = sysctl_ipfrag_max_dist;
	unsigned int start, end;

	int rc;

	if (!peer || !max)
		return 0;

	start = qp->rid;
	end = atomic_inc_return(&peer->rid);
	qp->rid = end;

276
	rc = qp->q.fragments && (end - start) > max;
H
Herbert Xu 已提交
277 278 279 280 281 282 283 284 285 286 287 288

	if (rc) {
		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
	}

	return rc;
}

static int ip_frag_reinit(struct ipq *qp)
{
	struct sk_buff *fp;

289
	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) {
290
		atomic_inc(&qp->q.refcnt);
H
Herbert Xu 已提交
291 292 293
		return -ETIMEDOUT;
	}

294
	fp = qp->q.fragments;
H
Herbert Xu 已提交
295 296 297 298 299 300
	do {
		struct sk_buff *xp = fp->next;
		frag_kfree_skb(fp, NULL);
		fp = xp;
	} while (fp);

301 302 303 304
	qp->q.last_in = 0;
	qp->q.len = 0;
	qp->q.meat = 0;
	qp->q.fragments = NULL;
H
Herbert Xu 已提交
305 306 307 308 309
	qp->iif = 0;

	return 0;
}

L
Linus Torvalds 已提交
310
/* Add new segment to existing queue. */
311
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
L
Linus Torvalds 已提交
312 313
{
	struct sk_buff *prev, *next;
314
	struct net_device *dev;
L
Linus Torvalds 已提交
315 316
	int flags, offset;
	int ihl, end;
317
	int err = -ENOENT;
L
Linus Torvalds 已提交
318

319
	if (qp->q.last_in & COMPLETE)
L
Linus Torvalds 已提交
320 321
		goto err;

H
Herbert Xu 已提交
322
	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
323 324
	    unlikely(ip_frag_too_far(qp)) &&
	    unlikely(err = ip_frag_reinit(qp))) {
H
Herbert Xu 已提交
325 326 327 328
		ipq_kill(qp);
		goto err;
	}

329
	offset = ntohs(ip_hdr(skb)->frag_off);
L
Linus Torvalds 已提交
330 331 332
	flags = offset & ~IP_OFFSET;
	offset &= IP_OFFSET;
	offset <<= 3;		/* offset is in 8-byte chunks */
333
	ihl = ip_hdrlen(skb);
L
Linus Torvalds 已提交
334 335

	/* Determine the position of this fragment. */
336
	end = offset + skb->len - ihl;
337
	err = -EINVAL;
L
Linus Torvalds 已提交
338 339 340 341 342 343

	/* Is this the final fragment? */
	if ((flags & IP_MF) == 0) {
		/* If we already have some bits beyond end
		 * or have different end, the segment is corrrupted.
		 */
344 345
		if (end < qp->q.len ||
		    ((qp->q.last_in & LAST_IN) && end != qp->q.len))
L
Linus Torvalds 已提交
346
			goto err;
347 348
		qp->q.last_in |= LAST_IN;
		qp->q.len = end;
L
Linus Torvalds 已提交
349 350 351 352 353 354
	} else {
		if (end&7) {
			end &= ~7;
			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
				skb->ip_summed = CHECKSUM_NONE;
		}
355
		if (end > qp->q.len) {
L
Linus Torvalds 已提交
356
			/* Some bits beyond end -> corruption. */
357
			if (qp->q.last_in & LAST_IN)
L
Linus Torvalds 已提交
358
				goto err;
359
			qp->q.len = end;
L
Linus Torvalds 已提交
360 361 362 363 364
		}
	}
	if (end == offset)
		goto err;

365
	err = -ENOMEM;
L
Linus Torvalds 已提交
366 367
	if (pskb_pull(skb, ihl) == NULL)
		goto err;
368 369 370

	err = pskb_trim_rcsum(skb, end - offset);
	if (err)
L
Linus Torvalds 已提交
371 372 373 374 375 376 377
		goto err;

	/* Find out which fragments are in front and at the back of us
	 * in the chain of fragments so far.  We must know where to put
	 * this fragment, right?
	 */
	prev = NULL;
378
	for (next = qp->q.fragments; next != NULL; next = next->next) {
L
Linus Torvalds 已提交
379 380 381 382 383 384 385 386 387 388 389 390 391 392
		if (FRAG_CB(next)->offset >= offset)
			break;	/* bingo! */
		prev = next;
	}

	/* We found where to put this one.  Check for overlap with
	 * preceding fragment, and, if needed, align things so that
	 * any overlaps are eliminated.
	 */
	if (prev) {
		int i = (FRAG_CB(prev)->offset + prev->len) - offset;

		if (i > 0) {
			offset += i;
393
			err = -EINVAL;
L
Linus Torvalds 已提交
394 395
			if (end <= offset)
				goto err;
396
			err = -ENOMEM;
L
Linus Torvalds 已提交
397 398 399 400 401 402 403
			if (!pskb_pull(skb, i))
				goto err;
			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
				skb->ip_summed = CHECKSUM_NONE;
		}
	}

404 405
	err = -ENOMEM;

L
Linus Torvalds 已提交
406 407 408 409 410 411 412 413 414 415
	while (next && FRAG_CB(next)->offset < end) {
		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */

		if (i < next->len) {
			/* Eat head of the next overlapped fragment
			 * and leave the loop. The next ones cannot overlap.
			 */
			if (!pskb_pull(next, i))
				goto err;
			FRAG_CB(next)->offset += i;
416
			qp->q.meat -= i;
L
Linus Torvalds 已提交
417 418 419 420 421 422
			if (next->ip_summed != CHECKSUM_UNNECESSARY)
				next->ip_summed = CHECKSUM_NONE;
			break;
		} else {
			struct sk_buff *free_it = next;

423
			/* Old fragment is completely overridden with
L
Linus Torvalds 已提交
424 425 426 427 428 429 430
			 * new one drop it.
			 */
			next = next->next;

			if (prev)
				prev->next = next;
			else
431
				qp->q.fragments = next;
L
Linus Torvalds 已提交
432

433
			qp->q.meat -= free_it->len;
L
Linus Torvalds 已提交
434 435 436 437 438 439 440 441 442 443 444
			frag_kfree_skb(free_it, NULL);
		}
	}

	FRAG_CB(skb)->offset = offset;

	/* Insert this fragment in the chain of fragments. */
	skb->next = next;
	if (prev)
		prev->next = skb;
	else
445
		qp->q.fragments = skb;
L
Linus Torvalds 已提交
446

447 448 449 450 451
	dev = skb->dev;
	if (dev) {
		qp->iif = dev->ifindex;
		skb->dev = NULL;
	}
452 453
	qp->q.stamp = skb->tstamp;
	qp->q.meat += skb->len;
454
	atomic_add(skb->truesize, &ip4_frags.mem);
L
Linus Torvalds 已提交
455
	if (offset == 0)
456
		qp->q.last_in |= FIRST_IN;
L
Linus Torvalds 已提交
457

458
	if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len)
459 460
		return ip_frag_reasm(qp, prev, dev);

461 462 463
	write_lock(&ip4_frags.lock);
	list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
	write_unlock(&ip4_frags.lock);
464
	return -EINPROGRESS;
L
Linus Torvalds 已提交
465 466 467

err:
	kfree_skb(skb);
468
	return err;
L
Linus Torvalds 已提交
469 470 471 472 473
}


/* Build a new IP datagram from all its fragments. */

474 475
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
			 struct net_device *dev)
L
Linus Torvalds 已提交
476 477
{
	struct iphdr *iph;
478
	struct sk_buff *fp, *head = qp->q.fragments;
L
Linus Torvalds 已提交
479 480
	int len;
	int ihlen;
481
	int err;
L
Linus Torvalds 已提交
482 483 484

	ipq_kill(qp);

485 486 487 488 489 490 491 492 493 494
	/* Make the one we just received the head. */
	if (prev) {
		head = prev->next;
		fp = skb_clone(head, GFP_ATOMIC);
		if (!fp)
			goto out_nomem;

		fp->next = head->next;
		prev->next = fp;

495 496
		skb_morph(head, qp->q.fragments);
		head->next = qp->q.fragments->next;
497

498 499
		kfree_skb(qp->q.fragments);
		qp->q.fragments = head;
500 501
	}

L
Linus Torvalds 已提交
502 503 504 505
	BUG_TRAP(head != NULL);
	BUG_TRAP(FRAG_CB(head)->offset == 0);

	/* Allocate a new buffer for the datagram. */
506
	ihlen = ip_hdrlen(head);
507
	len = ihlen + qp->q.len;
L
Linus Torvalds 已提交
508

509
	err = -E2BIG;
S
Stephen Hemminger 已提交
510
	if (len > 65535)
L
Linus Torvalds 已提交
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
		goto out_oversize;

	/* Head of list must not be cloned. */
	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
		goto out_nomem;

	/* If the first fragment is fragmented itself, we split
	 * it to two chunks: the first with data and paged part
	 * and the second, holding only fragments. */
	if (skb_shinfo(head)->frag_list) {
		struct sk_buff *clone;
		int i, plen = 0;

		if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
			goto out_nomem;
		clone->next = head->next;
		head->next = clone;
		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
		skb_shinfo(head)->frag_list = NULL;
		for (i=0; i<skb_shinfo(head)->nr_frags; i++)
			plen += skb_shinfo(head)->frags[i].size;
		clone->len = clone->data_len = head->data_len - plen;
		head->data_len -= clone->len;
		head->len -= clone->len;
		clone->csum = 0;
		clone->ip_summed = head->ip_summed;
537
		atomic_add(clone->truesize, &ip4_frags.mem);
L
Linus Torvalds 已提交
538 539 540
	}

	skb_shinfo(head)->frag_list = head->next;
541
	skb_push(head, head->data - skb_network_header(head));
542
	atomic_sub(head->truesize, &ip4_frags.mem);
L
Linus Torvalds 已提交
543 544 545 546 547 548

	for (fp=head->next; fp; fp = fp->next) {
		head->data_len += fp->len;
		head->len += fp->len;
		if (head->ip_summed != fp->ip_summed)
			head->ip_summed = CHECKSUM_NONE;
549
		else if (head->ip_summed == CHECKSUM_COMPLETE)
L
Linus Torvalds 已提交
550 551
			head->csum = csum_add(head->csum, fp->csum);
		head->truesize += fp->truesize;
552
		atomic_sub(fp->truesize, &ip4_frags.mem);
L
Linus Torvalds 已提交
553 554 555 556
	}

	head->next = NULL;
	head->dev = dev;
557
	head->tstamp = qp->q.stamp;
L
Linus Torvalds 已提交
558

559
	iph = ip_hdr(head);
L
Linus Torvalds 已提交
560 561 562
	iph->frag_off = 0;
	iph->tot_len = htons(len);
	IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
563
	qp->q.fragments = NULL;
564
	return 0;
L
Linus Torvalds 已提交
565 566

out_nomem:
567
	LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
568
			      "queue %p\n", qp);
569
	err = -ENOMEM;
L
Linus Torvalds 已提交
570 571 572 573 574 575 576 577
	goto out_fail;
out_oversize:
	if (net_ratelimit())
		printk(KERN_INFO
			"Oversized IP packet from %d.%d.%d.%d.\n",
			NIPQUAD(qp->saddr));
out_fail:
	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
578
	return err;
L
Linus Torvalds 已提交
579 580 581
}

/* Process an incoming IP datagram fragment. */
582
int ip_defrag(struct sk_buff *skb, u32 user)
L
Linus Torvalds 已提交
583 584
{
	struct ipq *qp;
585
	struct net *net;
586

L
Linus Torvalds 已提交
587 588
	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);

589
	net = skb->dev->nd_net;
L
Linus Torvalds 已提交
590
	/* Start by cleaning up the memory. */
591
	if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
L
Linus Torvalds 已提交
592 593 594
		ip_evictor();

	/* Lookup (or create) queue header */
595
	if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
596
		int ret;
L
Linus Torvalds 已提交
597

598
		spin_lock(&qp->q.lock);
L
Linus Torvalds 已提交
599

600
		ret = ip_frag_queue(qp, skb);
L
Linus Torvalds 已提交
601

602
		spin_unlock(&qp->q.lock);
603
		ipq_put(qp);
604
		return ret;
L
Linus Torvalds 已提交
605 606 607 608
	}

	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
	kfree_skb(skb);
609
	return -ENOMEM;
L
Linus Torvalds 已提交
610 611
}

612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
#ifdef CONFIG_SYSCTL
static int zero;

static struct ctl_table ip4_frags_ctl_table[] = {
	{
		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
		.procname	= "ipfrag_high_thresh",
		.data		= &ip4_frags_ctl.high_thresh,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &proc_dointvec
	},
	{
		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
		.procname	= "ipfrag_low_thresh",
		.data		= &ip4_frags_ctl.low_thresh,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &proc_dointvec
	},
	{
		.ctl_name	= NET_IPV4_IPFRAG_TIME,
		.procname	= "ipfrag_time",
		.data		= &ip4_frags_ctl.timeout,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &proc_dointvec_jiffies,
		.strategy	= &sysctl_jiffies
	},
	{
		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
		.procname	= "ipfrag_secret_interval",
		.data		= &ip4_frags_ctl.secret_interval,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &proc_dointvec_jiffies,
		.strategy	= &sysctl_jiffies
	},
	{
		.procname	= "ipfrag_max_dist",
		.data		= &sysctl_ipfrag_max_dist,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &proc_dointvec_minmax,
		.extra1		= &zero
	},
	{ }
};

static int ip4_frags_ctl_register(struct net *net)
{
	struct ctl_table_header *hdr;

	hdr = register_net_sysctl_table(net, net_ipv4_ctl_path,
			ip4_frags_ctl_table);
	return hdr == NULL ? -ENOMEM : 0;
}
#else
static inline int ip4_frags_ctl_register(struct net *net)
{
	return 0;
}
#endif

static int ipv4_frags_init_net(struct net *net)
{
678 679
	inet_frags_init_net(&net->ipv4.frags);

680 681 682
	return ip4_frags_ctl_register(net);
}

683
void __init ipfrag_init(void)
L
Linus Torvalds 已提交
684
{
685
	ipv4_frags_init_net(&init_net);
686
	ip4_frags.ctl = &ip4_frags_ctl;
687
	ip4_frags.hashfn = ip4_hashfn;
688
	ip4_frags.constructor = ip4_frag_init;
689 690 691
	ip4_frags.destructor = ip4_frag_free;
	ip4_frags.skb_free = NULL;
	ip4_frags.qsize = sizeof(struct ipq);
692
	ip4_frags.match = ip4_frag_match;
693
	ip4_frags.frag_expire = ip_expire;
694
	inet_frags_init(&ip4_frags);
L
Linus Torvalds 已提交
695 696 697
}

EXPORT_SYMBOL(ip_defrag);