nf_conntrack_proto_tcp.c 50.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* (C) 1999-2001 Paul `Rusty' Russell
 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/types.h>
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
18
#include <asm/unaligned.h>
19 20 21 22 23 24 25

#include <net/tcp.h>

#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
26
#include <net/netfilter/nf_conntrack_l4proto.h>
27
#include <net/netfilter/nf_conntrack_ecache.h>
28
#include <net/netfilter/nf_log.h>
29 30
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31

32 33
/* "Be conservative in what you do,
    be liberal in what you accept from others."
34
    If it's non-zero, we mark only out of window RST segments as INVALID. */
35
static int nf_ct_tcp_be_liberal __read_mostly = 0;
36

37
/* If it is set to zero, we disable picking up already established
38
   connections. */
39
static int nf_ct_tcp_loose __read_mostly = 1;
40

41 42
/* Max number of the retransmitted packets without receiving an (acceptable)
   ACK from the destination. If this number is reached, a shorter timer
43
   will be started. */
44
static int nf_ct_tcp_max_retrans __read_mostly = 3;
45 46 47 48

  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
     closely.  They're more complex. --RR */

49
static const char *const tcp_conntrack_names[] = {
50 51 52 53 54 55 56 57 58
	"NONE",
	"SYN_SENT",
	"SYN_RECV",
	"ESTABLISHED",
	"FIN_WAIT",
	"CLOSE_WAIT",
	"LAST_ACK",
	"TIME_WAIT",
	"CLOSE",
59
	"SYN_SENT2",
60
};
61

62 63 64 65 66
#define SECS * HZ
#define MINS * 60 SECS
#define HOURS * 60 MINS
#define DAYS * 24 HOURS

67
static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
68 69 70 71 72 73 74 75
	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
76
	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
77 78 79 80 81
/* RFC1122 says the R2 limit should be at least 100 seconds.
   Linux uses 15 packets as limit, which corresponds
   to ~13-30min depending on RTO. */
	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
	[TCP_CONNTRACK_UNACK]		= 5 MINS,
82
};
83

84 85 86 87 88 89 90 91 92
#define sNO TCP_CONNTRACK_NONE
#define sSS TCP_CONNTRACK_SYN_SENT
#define sSR TCP_CONNTRACK_SYN_RECV
#define sES TCP_CONNTRACK_ESTABLISHED
#define sFW TCP_CONNTRACK_FIN_WAIT
#define sCW TCP_CONNTRACK_CLOSE_WAIT
#define sLA TCP_CONNTRACK_LAST_ACK
#define sTW TCP_CONNTRACK_TIME_WAIT
#define sCL TCP_CONNTRACK_CLOSE
93
#define sS2 TCP_CONNTRACK_SYN_SENT2
94 95 96 97 98 99 100 101 102 103 104 105
#define sIV TCP_CONNTRACK_MAX
#define sIG TCP_CONNTRACK_IGNORE

/* What TCP flags are set from RST/SYN/FIN/ACK. */
enum tcp_bit_set {
	TCP_SYN_SET,
	TCP_SYNACK_SET,
	TCP_FIN_SET,
	TCP_ACK_SET,
	TCP_RST_SET,
	TCP_NONE_SET,
};
106

107 108 109 110 111
/*
 * The TCP state transition table needs a few words...
 *
 * We are the man in the middle. All the packets go through us
 * but might get lost in transit to the destination.
112
 * It is assumed that the destinations can't receive segments
113 114 115 116 117 118 119 120 121
 * we haven't seen.
 *
 * The checked segment is in window, but our windows are *not*
 * equivalent with the ones of the sender/receiver. We always
 * try to guess the state of the current sender.
 *
 * The meaning of the states are:
 *
 * NONE:	initial state
122
 * SYN_SENT:	SYN-only packet seen
123
 * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
124 125 126
 * SYN_RECV:	SYN-ACK packet seen
 * ESTABLISHED:	ACK packet seen
 * FIN_WAIT:	FIN packet seen
127
 * CLOSE_WAIT:	ACK seen (after FIN)
128 129
 * LAST_ACK:	FIN seen (after FIN)
 * TIME_WAIT:	last ACK seen
130
 * CLOSE:	closed connection (RST)
131 132
 *
 * Packets marked as IGNORED (sIG):
133 134
 *	if they may be either invalid or valid
 *	and the receiver may send back a connection
135 136 137
 *	closing RST or a SYN/ACK.
 *
 * Packets marked as INVALID (sIV):
138
 *	if we regard them as truly invalid packets
139
 */
140
static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
141 142
	{
/* ORIGINAL */
143 144
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
145 146 147
/*
 *	sNO -> sSS	Initialize a new connection
 *	sSS -> sSS	Retransmitted SYN
148 149
 *	sS2 -> sS2	Late retransmitted SYN
 *	sSR -> sIG
150
 *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
151
 *			are errors. Receiver will reply with RST
152 153 154 155 156 157 158 159
 *			and close the connection.
 *			Or we are not in sync and hold a dead connection.
 *	sFW -> sIG
 *	sCW -> sIG
 *	sLA -> sIG
 *	sTW -> sSS	Reopened connection (RFC 1122).
 *	sCL -> sSS
 */
160
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
161
/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
162
/*
163 164 165
 *	sNO -> sIV	Too late and no reason to do anything
 *	sSS -> sIV	Client can't send SYN and then SYN/ACK
 *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
166 167 168 169 170 171 172
 *	sSR -> sSR	Late retransmitted SYN/ACK in simultaneous open
 *	sES -> sIV	Invalid SYN/ACK packets sent by the client
 *	sFW -> sIV
 *	sCW -> sIV
 *	sLA -> sIV
 *	sTW -> sIV
 *	sCL -> sIV
173
 */
174
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
175 176 177 178 179
/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
 *	sNO -> sIV	Too late and no reason to do anything...
 *	sSS -> sIV	Client migth not send FIN in this state:
 *			we enforce waiting for a SYN/ACK reply first.
180
 *	sS2 -> sIV
181 182 183
 *	sSR -> sFW	Close started.
 *	sES -> sFW
 *	sFW -> sLA	FIN seen in both directions, waiting for
184
 *			the last ACK.
185 186 187 188 189 190
 *			Migth be a retransmitted FIN as well...
 *	sCW -> sLA
 *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
 *	sTW -> sTW
 *	sCL -> sCL
 */
191
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
192 193 194 195
/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
 *	sNO -> sES	Assumed.
 *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
196
 *	sS2 -> sIV
197 198 199 200 201 202 203 204
 *	sSR -> sES	Established state is reached.
 *	sES -> sES	:-)
 *	sFW -> sCW	Normal close request answered by ACK.
 *	sCW -> sCW
 *	sLA -> sTW	Last ACK detected.
 *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
 *	sCL -> sCL
 */
205 206
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
207 208 209 210
/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
	},
	{
/* REPLY */
211 212
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
/*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
213 214
/*
 *	sNO -> sIV	Never reached.
215 216 217 218
 *	sSS -> sS2	Simultaneous open
 *	sS2 -> sS2	Retransmitted simultaneous SYN
 *	sSR -> sIV	Invalid SYN packets sent by the server
 *	sES -> sIV
219 220 221 222 223 224
 *	sFW -> sIV
 *	sCW -> sIV
 *	sLA -> sIV
 *	sTW -> sIV	Reopened connection, but server may not do it.
 *	sCL -> sIV
 */
225
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
226
/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
227 228
/*
 *	sSS -> sSR	Standard open.
229
 *	sS2 -> sSR	Simultaneous open
230
 *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
231 232 233 234 235 236 237
 *	sES -> sIG	Late retransmitted SYN/ACK?
 *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
 *	sCW -> sIG
 *	sLA -> sIG
 *	sTW -> sIG
 *	sCL -> sIG
 */
238
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
239 240 241
/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
 *	sSS -> sIV	Server might not send FIN in this state.
242
 *	sS2 -> sIV
243 244 245 246 247 248 249 250
 *	sSR -> sFW	Close started.
 *	sES -> sFW
 *	sFW -> sLA	FIN seen in both directions.
 *	sCW -> sLA
 *	sLA -> sLA	Retransmitted FIN.
 *	sTW -> sTW
 *	sCL -> sCL
 */
251 252
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
253
/*
254
 *	sSS -> sIG	Might be a half-open connection.
255
 *	sS2 -> sIG
256 257 258 259 260 261 262 263
 *	sSR -> sSR	Might answer late resent SYN.
 *	sES -> sES	:-)
 *	sFW -> sCW	Normal close request answered by ACK.
 *	sCW -> sCW
 *	sLA -> sTW	Last ACK detected.
 *	sTW -> sTW	Retransmitted last ACK.
 *	sCL -> sCL
 */
264 265
/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
266
/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
267
	}
268 269
};

270 271 272 273 274
static inline struct nf_tcp_net *tcp_pernet(struct net *net)
{
	return &net->ct.nf_ct_proto.tcp;
}

275 276
static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
			     struct nf_conntrack_tuple *tuple)
277
{
278 279
	const struct tcphdr *hp;
	struct tcphdr _hdr;
280 281 282 283

	/* Actually only need first 8 bytes. */
	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
	if (hp == NULL)
284
		return false;
285 286 287 288

	tuple->src.u.tcp.port = hp->source;
	tuple->dst.u.tcp.port = hp->dest;

289
	return true;
290 291
}

292 293
static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
			     const struct nf_conntrack_tuple *orig)
294 295 296
{
	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
297
	return true;
298 299 300 301 302 303 304 305 306 307 308 309
}

/* Print out the per-protocol part of the tuple. */
static int tcp_print_tuple(struct seq_file *s,
			   const struct nf_conntrack_tuple *tuple)
{
	return seq_printf(s, "sport=%hu dport=%hu ",
			  ntohs(tuple->src.u.tcp.port),
			  ntohs(tuple->dst.u.tcp.port));
}

/* Print out the private part of the conntrack. */
310
static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
311 312 313
{
	enum tcp_conntrack state;

314
	spin_lock_bh(&ct->lock);
315
	state = ct->proto.tcp.state;
316
	spin_unlock_bh(&ct->lock);
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331

	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
}

static unsigned int get_conntrack_index(const struct tcphdr *tcph)
{
	if (tcph->rst) return TCP_RST_SET;
	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
	else if (tcph->fin) return TCP_FIN_SET;
	else if (tcph->ack) return TCP_ACK_SET;
	else return TCP_NONE_SET;
}

/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
   in IP Filter' by Guido van Rooij.
332

333 334
   http://www.sane.nl/events/sane2000/papers.html
   http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
335

336 337 338 339 340
   The boundaries and the conditions are changed according to RFC793:
   the packet must intersect the window (i.e. segments may be
   after the right or before the left edge) and thus receivers may ACK
   segments after the right edge of the window.

341
	td_maxend = max(sack + max(win,1)) seen in reply packets
342 343 344 345
	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
	td_maxwin += seq + len - sender.td_maxend
			if seq + len > sender.td_maxend
	td_end    = max(seq + len) seen in sent packets
346

347 348
   I.   Upper bound for valid data:	seq <= sender.td_maxend
   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
349 350
   III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
   IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
351

352 353
   where sack is the highest right edge of sack block found in the packet
   or ack in the case of packet without SACK option.
354

355
   The upper bound limit for a valid (s)ack is not ignored -
356
   we doesn't have to deal with fragments.
357 358 359 360 361
*/

static inline __u32 segment_seq_plus_len(__u32 seq,
					 size_t len,
					 unsigned int dataoff,
362
					 const struct tcphdr *tcph)
363 364 365 366 367 368
{
	/* XXX Should I use payload length field in IP/IPv6 header ?
	 * - YK */
	return (seq + len - dataoff - tcph->doff*4
		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
}
369

370 371 372 373 374
/* Fixme: what about big packets? */
#define MAXACKWINCONST			66000
#define MAXACKWINDOW(sender)						\
	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
					      : MAXACKWINCONST)
375

376 377 378 379 380
/*
 * Simplified tcp_parse_options routine from tcp_input.c
 */
static void tcp_options(const struct sk_buff *skb,
			unsigned int dataoff,
381
			const struct tcphdr *tcph,
382 383 384
			struct ip_ct_tcp_state *state)
{
	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
385
	const unsigned char *ptr;
386 387 388 389 390 391 392 393 394
	int length = (tcph->doff*4) - sizeof(struct tcphdr);

	if (!length)
		return;

	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
				 length, buff);
	BUG_ON(ptr == NULL);

395
	state->td_scale =
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
	state->flags = 0;

	while (length > 0) {
		int opcode=*ptr++;
		int opsize;

		switch (opcode) {
		case TCPOPT_EOL:
			return;
		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
			length--;
			continue;
		default:
			opsize=*ptr++;
			if (opsize < 2) /* "silly options" */
				return;
			if (opsize > length)
413
				return;	/* don't parse partial options */
414

415
			if (opcode == TCPOPT_SACK_PERM
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
			    && opsize == TCPOLEN_SACK_PERM)
				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
			else if (opcode == TCPOPT_WINDOW
				 && opsize == TCPOLEN_WINDOW) {
				state->td_scale = *(u_int8_t *)ptr;

				if (state->td_scale > 14) {
					/* See RFC1323 */
					state->td_scale = 14;
				}
				state->flags |=
					IP_CT_TCP_FLAG_WINDOW_SCALE;
			}
			ptr += opsize - 2;
			length -= opsize;
		}
	}
}

static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
436
                     const struct tcphdr *tcph, __u32 *sack)
437
{
438
	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
439
	const unsigned char *ptr;
440 441 442 443 444 445 446 447 448 449 450
	int length = (tcph->doff*4) - sizeof(struct tcphdr);
	__u32 tmp;

	if (!length)
		return;

	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
				 length, buff);
	BUG_ON(ptr == NULL);

	/* Fast path for timestamp-only option */
451
	if (length == TCPOLEN_TSTAMP_ALIGNED
452 453 454 455
	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
				       | (TCPOPT_NOP << 16)
				       | (TCPOPT_TIMESTAMP << 8)
				       | TCPOLEN_TIMESTAMP))
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
		return;

	while (length > 0) {
		int opcode = *ptr++;
		int opsize, i;

		switch (opcode) {
		case TCPOPT_EOL:
			return;
		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
			length--;
			continue;
		default:
			opsize = *ptr++;
			if (opsize < 2) /* "silly options" */
				return;
			if (opsize > length)
473
				return;	/* don't parse partial options */
474

475 476 477 478 479 480 481 482
			if (opcode == TCPOPT_SACK
			    && opsize >= (TCPOLEN_SACK_BASE
					  + TCPOLEN_SACK_PERBLOCK)
			    && !((opsize - TCPOLEN_SACK_BASE)
				 % TCPOLEN_SACK_PERBLOCK)) {
				for (i = 0;
				     i < (opsize - TCPOLEN_SACK_BASE);
				     i += TCPOLEN_SACK_PERBLOCK) {
483
					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
484 485 486 487 488 489 490 491 492 493 494 495

					if (after(tmp, *sack))
						*sack = tmp;
				}
				return;
			}
			ptr += opsize - 2;
			length -= opsize;
		}
	}
}

496 497 498 499 500 501 502 503 504
#ifdef CONFIG_NF_NAT_NEEDED
static inline s16 nat_offset(const struct nf_conn *ct,
			     enum ip_conntrack_dir dir,
			     u32 seq)
{
	typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);

	return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
}
505 506
#define NAT_OFFSET(ct, dir, seq) \
	(nat_offset(ct, dir, seq))
507
#else
508
#define NAT_OFFSET(ct, dir, seq)	0
509 510
#endif

511 512 513 514 515 516 517
static bool tcp_in_window(const struct nf_conn *ct,
			  struct ip_ct_tcp *state,
			  enum ip_conntrack_dir dir,
			  unsigned int index,
			  const struct sk_buff *skb,
			  unsigned int dataoff,
			  const struct tcphdr *tcph,
518
			  u_int8_t pf)
519
{
520
	struct net *net = nf_ct_net(ct);
521
	struct nf_tcp_net *tn = tcp_pernet(net);
522 523
	struct ip_ct_tcp_state *sender = &state->seen[dir];
	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
524
	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
525
	__u32 seq, ack, sack, end, win, swin;
526
	s16 receiver_offset;
527
	bool res;
528 529 530 531 532 533 534 535 536 537 538 539

	/*
	 * Get the required data from the packet.
	 */
	seq = ntohl(tcph->seq);
	ack = sack = ntohl(tcph->ack_seq);
	win = ntohs(tcph->window);
	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);

	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
		tcp_sack(skb, dataoff, tcph, &sack);

540
	/* Take into account NAT sequence number mangling */
541
	receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
542 543 544
	ack -= receiver_offset;
	sack -= receiver_offset;

545 546
	pr_debug("tcp_in_window: START\n");
	pr_debug("tcp_in_window: ");
547
	nf_ct_dump_tuple(tuple);
548 549
	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
550 551 552 553 554 555
	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
		 sender->td_end, sender->td_maxend, sender->td_maxwin,
		 sender->td_scale,
		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
		 receiver->td_scale);
556

557
	if (sender->td_maxwin == 0) {
558 559 560
		/*
		 * Initialize sender data.
		 */
561
		if (tcph->syn) {
562
			/*
563 564
			 * SYN-ACK in reply to a SYN
			 * or SYN from reply direction in simultaneous open.
565
			 */
566
			sender->td_end =
567 568 569 570
			sender->td_maxend = end;
			sender->td_maxwin = (win == 0 ? 1 : win);

			tcp_options(skb, dataoff, tcph, sender);
571
			/*
572 573 574 575 576 577
			 * RFC 1323:
			 * Both sides must send the Window Scale option
			 * to enable window scaling in either direction.
			 */
			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
578
				sender->td_scale =
579
				receiver->td_scale = 0;
580 581 582
			if (!tcph->ack)
				/* Simultaneous open */
				return true;
583 584 585 586 587
		} else {
			/*
			 * We are in the middle of a connection,
			 * its history is lost for us.
			 * Let's try to use the data from the packet.
588
			 */
589
			sender->td_end = end;
590 591
			swin = win << sender->td_scale;
			sender->td_maxwin = (swin == 0 ? 1 : swin);
592
			sender->td_maxend = end + sender->td_maxwin;
593 594 595 596 597 598 599
			/*
			 * We haven't seen traffic in the other direction yet
			 * but we have to tweak window tracking to pass III
			 * and IV until that happens.
			 */
			if (receiver->td_maxwin == 0)
				receiver->td_end = receiver->td_maxend = sack;
600 601 602 603 604 605 606 607
		}
	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
		     && dir == IP_CT_DIR_ORIGINAL)
		   || (state->state == TCP_CONNTRACK_SYN_RECV
		     && dir == IP_CT_DIR_REPLY))
		   && after(end, sender->td_end)) {
		/*
		 * RFC 793: "if a TCP is reinitialized ... then it need
608
		 * not wait at all; it must only be sure to use sequence
609 610 611 612 613 614 615 616 617 618 619 620 621 622
		 * numbers larger than those recently used."
		 */
		sender->td_end =
		sender->td_maxend = end;
		sender->td_maxwin = (win == 0 ? 1 : win);

		tcp_options(skb, dataoff, tcph, sender);
	}

	if (!(tcph->ack)) {
		/*
		 * If there is no ACK, just pretend it was set and OK.
		 */
		ack = sack = receiver->td_end;
623 624
	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
		    (TCP_FLAG_ACK|TCP_FLAG_RST))
625 626 627 628 629 630 631 632
		   && (ack == 0)) {
		/*
		 * Broken TCP stacks, that set ACK in RST packets as well
		 * with zero ack value.
		 */
		ack = sack = receiver->td_end;
	}

633
	if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
634
		/*
635
		 * RST sent answering SYN.
636 637 638
		 */
		seq = end = sender->td_end;

639
	pr_debug("tcp_in_window: ");
640
	nf_ct_dump_tuple(tuple);
641 642
	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
643 644 645 646 647 648 649 650 651 652 653
	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
		 sender->td_end, sender->td_maxend, sender->td_maxwin,
		 sender->td_scale,
		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
		 receiver->td_scale);

	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
		 before(seq, sender->td_maxend + 1),
		 after(end, sender->td_end - receiver->td_maxwin - 1),
		 before(sack, receiver->td_end + 1),
654
		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
655

656 657 658
	if (before(seq, sender->td_maxend + 1) &&
	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
	    before(sack, receiver->td_end + 1) &&
659
	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
660
		/*
661 662 663 664 665 666 667 668 669 670 671
		 * Take into account window scaling (RFC 1323).
		 */
		if (!tcph->syn)
			win <<= sender->td_scale;

		/*
		 * Update sender data.
		 */
		swin = win + (sack - ack);
		if (sender->td_maxwin < swin)
			sender->td_maxwin = swin;
672
		if (after(end, sender->td_end)) {
673
			sender->td_end = end;
674 675
			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
		}
676 677 678 679 680 681 682 683
		if (tcph->ack) {
			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
				sender->td_maxack = ack;
				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
			} else if (after(ack, sender->td_maxack))
				sender->td_maxack = ack;
		}

684 685 686
		/*
		 * Update receiver data.
		 */
687
		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
688 689 690 691 692 693
			receiver->td_maxwin += end - sender->td_maxend;
		if (after(sack + win, receiver->td_maxend - 1)) {
			receiver->td_maxend = sack + win;
			if (win == 0)
				receiver->td_maxend++;
		}
694 695
		if (ack == receiver->td_end)
			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
696

697
		/*
698 699 700 701 702 703
		 * Check retransmissions.
		 */
		if (index == TCP_ACK_SET) {
			if (state->last_dir == dir
			    && state->last_seq == seq
			    && state->last_ack == ack
704 705
			    && state->last_end == end
			    && state->last_win == win)
706 707 708 709 710 711
				state->retrans++;
			else {
				state->last_dir = dir;
				state->last_seq = seq;
				state->last_ack = ack;
				state->last_end = end;
712
				state->last_win = win;
713 714 715
				state->retrans = 0;
			}
		}
716
		res = true;
717
	} else {
718
		res = false;
719
		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
720
		    tn->tcp_be_liberal)
721
			res = true;
722
		if (!res && LOG_INVALID(net, IPPROTO_TCP))
723
			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
724 725 726 727
			"nf_ct_tcp: %s ",
			before(seq, sender->td_maxend + 1) ?
			after(end, sender->td_end - receiver->td_maxwin - 1) ?
			before(sack, receiver->td_end + 1) ?
728
			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
729 730 731 732
			: "ACK is under the lower bound (possible overly delayed ACK)"
			: "ACK is over the upper bound (ACKed data not seen yet)"
			: "SEQ is under the lower bound (already ACKed data retransmitted)"
			: "SEQ is over the upper bound (over the window of the receiver)");
733 734
	}

735
	pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
736 737 738
		 "receiver end=%u maxend=%u maxwin=%u\n",
		 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
739 740 741 742

	return res;
}

743
/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
C
Changli Gao 已提交
744 745
static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
				 TCPHDR_URG) + 1] =
746
{
C
Changli Gao 已提交
747 748 749 750 751 752 753 754 755
	[TCPHDR_SYN]				= 1,
	[TCPHDR_SYN|TCPHDR_URG]			= 1,
	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
	[TCPHDR_RST]				= 1,
	[TCPHDR_RST|TCPHDR_ACK]			= 1,
	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
	[TCPHDR_ACK]				= 1,
	[TCPHDR_ACK|TCPHDR_URG]			= 1,
756 757 758
};

/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
759
static int tcp_error(struct net *net, struct nf_conn *tmpl,
760
		     struct sk_buff *skb,
761 762
		     unsigned int dataoff,
		     enum ip_conntrack_info *ctinfo,
763
		     u_int8_t pf,
764
		     unsigned int hooknum)
765
{
766 767
	const struct tcphdr *th;
	struct tcphdr _tcph;
768 769 770 771 772 773
	unsigned int tcplen = skb->len - dataoff;
	u_int8_t tcpflags;

	/* Smaller that minimal TCP header? */
	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
	if (th == NULL) {
774
		if (LOG_INVALID(net, IPPROTO_TCP))
775
			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
776 777
				"nf_ct_tcp: short packet ");
		return -NF_ACCEPT;
778 779
	}

780 781
	/* Not whole TCP header or malformed packet */
	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
782
		if (LOG_INVALID(net, IPPROTO_TCP))
783
			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
784 785 786
				"nf_ct_tcp: truncated/malformed packet ");
		return -NF_ACCEPT;
	}
787

788 789
	/* Checksum invalid? Ignore.
	 * We skip checking packets on the outgoing path
790
	 * because the checksum is assumed to be correct.
791 792
	 */
	/* FIXME: Source route IP option packets --RR */
793
	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
794
	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
795
		if (LOG_INVALID(net, IPPROTO_TCP))
796
			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
797 798 799 800 801
				  "nf_ct_tcp: bad TCP checksum ");
		return -NF_ACCEPT;
	}

	/* Check TCP flags. */
C
Changli Gao 已提交
802
	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
803
	if (!tcp_valid_flags[tcpflags]) {
804
		if (LOG_INVALID(net, IPPROTO_TCP))
805
			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
806 807 808 809 810 811 812
				  "nf_ct_tcp: invalid TCP flag combination ");
		return -NF_ACCEPT;
	}

	return NF_ACCEPT;
}

813 814
static unsigned int *tcp_get_timeouts(struct net *net)
{
815
	return tcp_pernet(net)->timeouts;
816 817
}

818
/* Returns verdict for packet, or -1 for invalid. */
819
static int tcp_packet(struct nf_conn *ct,
820 821 822
		      const struct sk_buff *skb,
		      unsigned int dataoff,
		      enum ip_conntrack_info ctinfo,
823
		      u_int8_t pf,
824 825
		      unsigned int hooknum,
		      unsigned int *timeouts)
826
{
827
	struct net *net = nf_ct_net(ct);
828
	struct nf_tcp_net *tn = tcp_pernet(net);
829
	struct nf_conntrack_tuple *tuple;
830 831
	enum tcp_conntrack new_state, old_state;
	enum ip_conntrack_dir dir;
832 833
	const struct tcphdr *th;
	struct tcphdr _tcph;
834 835 836 837 838 839
	unsigned long timeout;
	unsigned int index;

	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
	BUG_ON(th == NULL);

840
	spin_lock_bh(&ct->lock);
841
	old_state = ct->proto.tcp.state;
842 843 844
	dir = CTINFO2DIR(ctinfo);
	index = get_conntrack_index(th);
	new_state = tcp_conntracks[dir][index][old_state];
845
	tuple = &ct->tuplehash[dir].tuple;
846 847

	switch (new_state) {
848 849 850
	case TCP_CONNTRACK_SYN_SENT:
		if (old_state < TCP_CONNTRACK_TIME_WAIT)
			break;
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865
		/* RFC 1122: "When a connection is closed actively,
		 * it MUST linger in TIME-WAIT state for a time 2xMSL
		 * (Maximum Segment Lifetime). However, it MAY accept
		 * a new SYN from the remote TCP to reopen the connection
		 * directly from TIME-WAIT state, if..."
		 * We ignore the conditions because we are in the
		 * TIME-WAIT state anyway.
		 *
		 * Handle aborted connections: we and the server
		 * think there is an existing connection but the client
		 * aborts it and starts a new one.
		 */
		if (((ct->proto.tcp.seen[dir].flags
		      | ct->proto.tcp.seen[!dir].flags)
		     & IP_CT_TCP_FLAG_CLOSE_INIT)
866 867
		    || (ct->proto.tcp.last_dir == dir
		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
868 869
			/* Attempt to reopen a closed/aborted connection.
			 * Delete this connection and look up again. */
870
			spin_unlock_bh(&ct->lock);
871

872 873 874 875
			/* Only repeat if we can actually remove the timer.
			 * Destruction may already be in progress in process
			 * context and we must give it a chance to terminate.
			 */
876
			if (nf_ct_kill(ct))
877
				return -NF_REPEAT;
878
			return NF_DROP;
879 880
		}
		/* Fall through */
881
	case TCP_CONNTRACK_IGNORE:
882
		/* Ignored packets:
883 884 885 886
		 *
		 * Our connection entry may be out of sync, so ignore
		 * packets which may signal the real connection between
		 * the client and the server.
887 888 889
		 *
		 * a) SYN in ORIGINAL
		 * b) SYN/ACK in REPLY
890
		 * c) ACK in reply direction after initial SYN in original.
891 892 893
		 *
		 * If the ignored packet is invalid, the receiver will send
		 * a RST we'll catch below.
894
		 */
895
		if (index == TCP_SYNACK_SET
896 897 898
		    && ct->proto.tcp.last_index == TCP_SYN_SET
		    && ct->proto.tcp.last_dir != dir
		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
899
			/* b) This SYN/ACK acknowledges a SYN that we earlier
900 901
			 * ignored as invalid. This means that the client and
			 * the server are both in sync, while the firewall is
902 903
			 * not. We get in sync from the previously annotated
			 * values.
904
			 */
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
			old_state = TCP_CONNTRACK_SYN_SENT;
			new_state = TCP_CONNTRACK_SYN_RECV;
			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
				ct->proto.tcp.last_end;
			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
				ct->proto.tcp.last_end;
			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
				ct->proto.tcp.last_win == 0 ?
					1 : ct->proto.tcp.last_win;
			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
				ct->proto.tcp.last_wscale;
			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
				ct->proto.tcp.last_flags;
			memset(&ct->proto.tcp.seen[dir], 0,
			       sizeof(struct ip_ct_tcp_state));
			break;
921
		}
922 923 924 925
		ct->proto.tcp.last_index = index;
		ct->proto.tcp.last_dir = dir;
		ct->proto.tcp.last_seq = ntohl(th->seq);
		ct->proto.tcp.last_end =
926
		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949
		ct->proto.tcp.last_win = ntohs(th->window);

		/* a) This is a SYN in ORIGINAL. The client and the server
		 * may be in sync but we are not. In that case, we annotate
		 * the TCP options and let the packet go through. If it is a
		 * valid SYN packet, the server will reply with a SYN/ACK, and
		 * then we'll get in sync. Otherwise, the server ignores it. */
		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
			struct ip_ct_tcp_state seen = {};

			ct->proto.tcp.last_flags =
			ct->proto.tcp.last_wscale = 0;
			tcp_options(skb, dataoff, th, &seen);
			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
				ct->proto.tcp.last_flags |=
					IP_CT_TCP_FLAG_WINDOW_SCALE;
				ct->proto.tcp.last_wscale = seen.td_scale;
			}
			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
				ct->proto.tcp.last_flags |=
					IP_CT_TCP_FLAG_SACK_PERM;
			}
		}
950
		spin_unlock_bh(&ct->lock);
951
		if (LOG_INVALID(net, IPPROTO_TCP))
952
			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
953 954
				  "nf_ct_tcp: invalid packet ignored in "
				  "state %s ", tcp_conntrack_names[old_state]);
955 956 957
		return NF_ACCEPT;
	case TCP_CONNTRACK_MAX:
		/* Invalid packet */
958 959
		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
			 dir, get_conntrack_index(th), old_state);
960
		spin_unlock_bh(&ct->lock);
961
		if (LOG_INVALID(net, IPPROTO_TCP))
962
			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
963 964 965
				  "nf_ct_tcp: invalid state ");
		return -NF_ACCEPT;
	case TCP_CONNTRACK_CLOSE:
966 967 968 969
		if (index == TCP_RST_SET
		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
			/* Invalid RST  */
970
			spin_unlock_bh(&ct->lock);
971
			if (LOG_INVALID(net, IPPROTO_TCP))
972 973
				nf_log_packet(net, pf, 0, skb, NULL, NULL,
					      NULL, "nf_ct_tcp: invalid RST ");
974 975
			return -NF_ACCEPT;
		}
976
		if (index == TCP_RST_SET
977 978 979 980 981
		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
			 && ct->proto.tcp.last_index == TCP_SYN_SET)
			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
			    && ct->proto.tcp.last_index == TCP_ACK_SET))
		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
A
Adrian Bunk 已提交
982
			/* RST sent to invalid SYN or ACK we had let through
983 984 985 986 987 988
			 * at a) and c) above:
			 *
			 * a) SYN was in window then
			 * c) we hold a half-open connection.
			 *
			 * Delete our connection entry.
989
			 * We skip window checking, because packet might ACK
990
			 * segments we ignored. */
991 992
			goto in_window;
		}
A
Adrian Bunk 已提交
993
		/* Just fall through */
994 995 996 997 998
	default:
		/* Keep compilers happy. */
		break;
	}

999
	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1000
			   skb, dataoff, th, pf)) {
1001
		spin_unlock_bh(&ct->lock);
1002 1003 1004 1005
		return -NF_ACCEPT;
	}
     in_window:
	/* From now on we have got in-window packets */
1006 1007
	ct->proto.tcp.last_index = index;
	ct->proto.tcp.last_dir = dir;
1008

1009
	pr_debug("tcp_conntracks: ");
1010
	nf_ct_dump_tuple(tuple);
1011 1012 1013 1014
	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
		 old_state, new_state);
1015

1016
	ct->proto.tcp.state = new_state;
1017
	if (old_state != new_state
1018
	    && new_state == TCP_CONNTRACK_FIN_WAIT)
1019
		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1020

1021
	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1022 1023
	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1024 1025
	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1026 1027
		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
		timeout = timeouts[TCP_CONNTRACK_UNACK];
1028
	else
1029
		timeout = timeouts[new_state];
1030
	spin_unlock_bh(&ct->lock);
1031 1032

	if (new_state != old_state)
1033
		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1034

1035
	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1036 1037 1038 1039 1040
		/* If only reply is a RST, we can consider ourselves not to
		   have an established connection: this is a fairly common
		   problem case, so we can delete the conntrack
		   immediately.  --RR */
		if (th->rst) {
1041
			nf_ct_kill_acct(ct, ctinfo, skb);
1042 1043
			return NF_ACCEPT;
		}
1044
	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1045 1046 1047
		   && (old_state == TCP_CONNTRACK_SYN_RECV
		       || old_state == TCP_CONNTRACK_ESTABLISHED)
		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1048 1049
		/* Set ASSURED if we see see valid ack in ESTABLISHED
		   after SYN_RECV or a valid answer for a picked up
1050
		   connection. */
1051
		set_bit(IPS_ASSURED_BIT, &ct->status);
1052
		nf_conntrack_event_cache(IPCT_ASSURED, ct);
1053
	}
1054
	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1055 1056 1057

	return NF_ACCEPT;
}
1058

1059
/* Called when a new connection for this protocol found. */
1060
static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1061
		    unsigned int dataoff, unsigned int *timeouts)
1062 1063
{
	enum tcp_conntrack new_state;
1064 1065
	const struct tcphdr *th;
	struct tcphdr _tcph;
1066 1067
	struct net *net = nf_ct_net(ct);
	struct nf_tcp_net *tn = tcp_pernet(net);
1068 1069
	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1070 1071 1072 1073 1074

	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
	BUG_ON(th == NULL);

	/* Don't need lock here: this conntrack not in circulation yet */
1075
	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1076 1077 1078

	/* Invalid: delete conntrack */
	if (new_state >= TCP_CONNTRACK_MAX) {
1079
		pr_debug("nf_ct_tcp: invalid new deleting.\n");
1080
		return false;
1081 1082 1083
	}

	if (new_state == TCP_CONNTRACK_SYN_SENT) {
1084
		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1085
		/* SYN packet */
1086
		ct->proto.tcp.seen[0].td_end =
1087 1088
			segment_seq_plus_len(ntohl(th->seq), skb->len,
					     dataoff, th);
1089 1090 1091 1092 1093 1094 1095
		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
		if (ct->proto.tcp.seen[0].td_maxwin == 0)
			ct->proto.tcp.seen[0].td_maxwin = 1;
		ct->proto.tcp.seen[0].td_maxend =
			ct->proto.tcp.seen[0].td_end;

		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1096
	} else if (tn->tcp_loose == 0) {
1097
		/* Don't try to pick up connections. */
1098
		return false;
1099
	} else {
1100
		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1101 1102 1103 1104 1105
		/*
		 * We are in the middle of a connection,
		 * its history is lost for us.
		 * Let's try to use the data from the packet.
		 */
1106
		ct->proto.tcp.seen[0].td_end =
1107 1108
			segment_seq_plus_len(ntohl(th->seq), skb->len,
					     dataoff, th);
1109 1110 1111 1112 1113 1114
		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
		if (ct->proto.tcp.seen[0].td_maxwin == 0)
			ct->proto.tcp.seen[0].td_maxwin = 1;
		ct->proto.tcp.seen[0].td_maxend =
			ct->proto.tcp.seen[0].td_end +
			ct->proto.tcp.seen[0].td_maxwin;
1115

1116 1117
		/* We assume SACK and liberal window checking to handle
		 * window scaling */
1118 1119 1120
		ct->proto.tcp.seen[0].flags =
		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
					      IP_CT_TCP_FLAG_BE_LIBERAL;
1121
	}
1122

1123
	/* tcp_packet will set them */
1124
	ct->proto.tcp.last_index = TCP_NONE_SET;
1125

1126 1127 1128 1129 1130 1131
	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
		 sender->td_end, sender->td_maxend, sender->td_maxwin,
		 sender->td_scale,
		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
		 receiver->td_scale);
1132
	return true;
1133
}
1134

I
Igor Maravić 已提交
1135
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1136 1137 1138 1139

#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>

1140
static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1141
			 struct nf_conn *ct)
1142
{
1143
	struct nlattr *nest_parms;
1144
	struct nf_ct_tcp_flags tmp = {};
1145

1146
	spin_lock_bh(&ct->lock);
1147 1148 1149 1150
	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
	if (!nest_parms)
		goto nla_put_failure;

1151 1152 1153 1154 1155 1156
	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
		       ct->proto.tcp.seen[0].td_scale) ||
	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
		       ct->proto.tcp.seen[1].td_scale))
		goto nla_put_failure;
1157 1158

	tmp.flags = ct->proto.tcp.seen[0].flags;
1159 1160 1161
	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
		    sizeof(struct nf_ct_tcp_flags), &tmp))
		goto nla_put_failure;
1162 1163

	tmp.flags = ct->proto.tcp.seen[1].flags;
1164 1165 1166
	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
		    sizeof(struct nf_ct_tcp_flags), &tmp))
		goto nla_put_failure;
1167
	spin_unlock_bh(&ct->lock);
1168

1169
	nla_nest_end(skb, nest_parms);
1170 1171 1172

	return 0;

1173
nla_put_failure:
1174
	spin_unlock_bh(&ct->lock);
1175 1176 1177
	return -1;
}

1178 1179 1180 1181 1182 1183
static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
1184 1185
};

1186
static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1187
{
1188
	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1189
	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1190
	int err;
1191 1192 1193

	/* updates could not contain anything about the private
	 * protocol info, in that case skip the parsing */
1194
	if (!pattr)
1195 1196
		return 0;

1197
	err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1198 1199
	if (err < 0)
		return err;
1200

1201 1202
	if (tb[CTA_PROTOINFO_TCP_STATE] &&
	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1203 1204
		return -EINVAL;

1205
	spin_lock_bh(&ct->lock);
1206 1207
	if (tb[CTA_PROTOINFO_TCP_STATE])
		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1208

1209
	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1210
		struct nf_ct_tcp_flags *attr =
1211
			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1212 1213 1214 1215
		ct->proto.tcp.seen[0].flags &= ~attr->mask;
		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
	}

1216
	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1217
		struct nf_ct_tcp_flags *attr =
1218
			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1219 1220 1221 1222
		ct->proto.tcp.seen[1].flags &= ~attr->mask;
		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
	}

1223 1224
	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1225 1226
	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1227 1228 1229 1230
		ct->proto.tcp.seen[0].td_scale =
			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
		ct->proto.tcp.seen[1].td_scale =
			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1231
	}
1232
	spin_unlock_bh(&ct->lock);
1233 1234 1235

	return 0;
}
1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246

static int tcp_nlattr_size(void)
{
	return nla_total_size(0)	   /* CTA_PROTOINFO_TCP */
		+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
}

static int tcp_nlattr_tuple_size(void)
{
	return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
}
1247
#endif
1248

1249 1250 1251 1252 1253
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)

#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_cttimeout.h>

1254 1255
static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
				     struct net *net, void *data)
1256 1257
{
	unsigned int *timeouts = data;
1258
	struct nf_tcp_net *tn = tcp_pernet(net);
1259 1260 1261 1262
	int i;

	/* set default TCP timeouts. */
	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1263
		timeouts[i] = tn->timeouts[i];
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316

	if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
		timeouts[TCP_CONNTRACK_SYN_SENT] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
		timeouts[TCP_CONNTRACK_SYN_RECV] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
		timeouts[TCP_CONNTRACK_ESTABLISHED] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
		timeouts[TCP_CONNTRACK_FIN_WAIT] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
		timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
		timeouts[TCP_CONNTRACK_LAST_ACK] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
		timeouts[TCP_CONNTRACK_TIME_WAIT] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
		timeouts[TCP_CONNTRACK_CLOSE] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
		timeouts[TCP_CONNTRACK_SYN_SENT2] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
		timeouts[TCP_CONNTRACK_RETRANS] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
	}
	if (tb[CTA_TIMEOUT_TCP_UNACK]) {
		timeouts[TCP_CONNTRACK_UNACK] =
			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
	}
	return 0;
}

static int
tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
{
	const unsigned int *timeouts = data;

1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
	if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
			htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
			 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
			 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
			 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
			 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
			 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
			 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
			 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
			 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
			 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
	    nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
			 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
		goto nla_put_failure;
1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355
	return 0;

nla_put_failure:
	return -ENOSPC;
}

static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
	[CTA_TIMEOUT_TCP_SYN_SENT]	= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_SYN_RECV]	= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_ESTABLISHED]	= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_FIN_WAIT]	= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_CLOSE_WAIT]	= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_LAST_ACK]	= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
1356 1357
	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
1358 1359 1360
};
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */

1361 1362 1363 1364 1365 1366
#ifdef CONFIG_SYSCTL
static struct ctl_table tcp_sysctl_table[] = {
	{
		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1367
		.proc_handler	= proc_dointvec_jiffies,
1368 1369 1370 1371 1372
	},
	{
		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1373
		.proc_handler	= proc_dointvec_jiffies,
1374 1375 1376 1377 1378
	},
	{
		.procname	= "nf_conntrack_tcp_timeout_established",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1379
		.proc_handler	= proc_dointvec_jiffies,
1380 1381 1382 1383 1384
	},
	{
		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1385
		.proc_handler	= proc_dointvec_jiffies,
1386 1387 1388 1389 1390
	},
	{
		.procname	= "nf_conntrack_tcp_timeout_close_wait",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1391
		.proc_handler	= proc_dointvec_jiffies,
1392 1393 1394 1395 1396
	},
	{
		.procname	= "nf_conntrack_tcp_timeout_last_ack",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1397
		.proc_handler	= proc_dointvec_jiffies,
1398 1399 1400 1401 1402
	},
	{
		.procname	= "nf_conntrack_tcp_timeout_time_wait",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1403
		.proc_handler	= proc_dointvec_jiffies,
1404 1405 1406 1407 1408
	},
	{
		.procname	= "nf_conntrack_tcp_timeout_close",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1409
		.proc_handler	= proc_dointvec_jiffies,
1410 1411 1412 1413 1414
	},
	{
		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1415
		.proc_handler	= proc_dointvec_jiffies,
1416
	},
1417 1418 1419 1420
	{
		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1421
		.proc_handler	= proc_dointvec_jiffies,
1422
	},
1423 1424 1425 1426
	{
		.procname	= "nf_conntrack_tcp_loose",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1427
		.proc_handler	= proc_dointvec,
1428 1429 1430 1431 1432
	},
	{
		.procname       = "nf_conntrack_tcp_be_liberal",
		.maxlen         = sizeof(unsigned int),
		.mode           = 0644,
A
Alexey Dobriyan 已提交
1433
		.proc_handler   = proc_dointvec,
1434 1435 1436 1437 1438
	},
	{
		.procname	= "nf_conntrack_tcp_max_retrans",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1439
		.proc_handler	= proc_dointvec,
1440
	},
1441
	{ }
1442
};
1443 1444 1445 1446 1447 1448 1449

#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
static struct ctl_table tcp_compat_sysctl_table[] = {
	{
		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1450
		.proc_handler	= proc_dointvec_jiffies,
1451
	},
1452 1453 1454 1455 1456 1457
	{
		.procname	= "ip_conntrack_tcp_timeout_syn_sent2",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_jiffies,
	},
1458 1459 1460 1461
	{
		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1462
		.proc_handler	= proc_dointvec_jiffies,
1463 1464 1465 1466 1467
	},
	{
		.procname	= "ip_conntrack_tcp_timeout_established",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1468
		.proc_handler	= proc_dointvec_jiffies,
1469 1470 1471 1472 1473
	},
	{
		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1474
		.proc_handler	= proc_dointvec_jiffies,
1475 1476 1477 1478 1479
	},
	{
		.procname	= "ip_conntrack_tcp_timeout_close_wait",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1480
		.proc_handler	= proc_dointvec_jiffies,
1481 1482 1483 1484 1485
	},
	{
		.procname	= "ip_conntrack_tcp_timeout_last_ack",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1486
		.proc_handler	= proc_dointvec_jiffies,
1487 1488 1489 1490 1491
	},
	{
		.procname	= "ip_conntrack_tcp_timeout_time_wait",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1492
		.proc_handler	= proc_dointvec_jiffies,
1493 1494 1495 1496 1497
	},
	{
		.procname	= "ip_conntrack_tcp_timeout_close",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1498
		.proc_handler	= proc_dointvec_jiffies,
1499 1500 1501 1502 1503
	},
	{
		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1504
		.proc_handler	= proc_dointvec_jiffies,
1505 1506 1507 1508 1509
	},
	{
		.procname	= "ip_conntrack_tcp_loose",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1510
		.proc_handler	= proc_dointvec,
1511 1512 1513 1514 1515
	},
	{
		.procname	= "ip_conntrack_tcp_be_liberal",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1516
		.proc_handler	= proc_dointvec,
1517 1518 1519 1520 1521
	},
	{
		.procname	= "ip_conntrack_tcp_max_retrans",
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
A
Alexey Dobriyan 已提交
1522
		.proc_handler	= proc_dointvec,
1523
	},
1524
	{ }
1525 1526
};
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1527 1528
#endif /* CONFIG_SYSCTL */

1529 1530
static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
				    struct nf_tcp_net *tn)
1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
{
#ifdef CONFIG_SYSCTL
	if (pn->ctl_table)
		return 0;

	pn->ctl_table = kmemdup(tcp_sysctl_table,
				sizeof(tcp_sysctl_table),
				GFP_KERNEL);
	if (!pn->ctl_table)
		return -ENOMEM;

	pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
	pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
	pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
	pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
	pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
	pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
	pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
	pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
	pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
	pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
	pn->ctl_table[10].data = &tn->tcp_loose;
	pn->ctl_table[11].data = &tn->tcp_be_liberal;
	pn->ctl_table[12].data = &tn->tcp_max_retrans;
#endif
	return 0;
}

1559 1560
static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
					   struct nf_tcp_net *tn)
1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587
{
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
	pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
				       sizeof(tcp_compat_sysctl_table),
				       GFP_KERNEL);
	if (!pn->ctl_compat_table)
		return -ENOMEM;

	pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
	pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
	pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
	pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
	pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
	pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
	pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
	pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
	pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
	pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
	pn->ctl_compat_table[10].data = &tn->tcp_loose;
	pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
	pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
#endif
#endif
	return 0;
}

1588
static int tcp_init_net(struct net *net, u_int16_t proto)
1589
{
1590
	int ret;
1591
	struct nf_tcp_net *tn = tcp_pernet(net);
1592 1593 1594 1595
	struct nf_proto_net *pn = &tn->pn;

	if (!pn->users) {
		int i;
1596 1597 1598 1599 1600 1601 1602 1603 1604

		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
			tn->timeouts[i] = tcp_timeouts[i];

		tn->tcp_loose = nf_ct_tcp_loose;
		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
	}

1605 1606 1607 1608
	if (proto == AF_INET) {
		ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
		if (ret < 0)
			return ret;
1609

1610 1611 1612 1613 1614
		ret = tcp_kmemdup_sysctl_table(pn, tn);
		if (ret < 0)
			nf_ct_kfree_compat_sysctl_table(pn);
	} else
		ret = tcp_kmemdup_sysctl_table(pn, tn);
1615 1616 1617 1618

	return ret;
}

1619 1620 1621 1622 1623
static struct nf_proto_net *tcp_get_net_proto(struct net *net)
{
	return &net->ct.nf_ct_proto.tcp.pn;
}

1624
struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1625 1626
{
	.l3proto		= PF_INET,
1627
	.l4proto 		= IPPROTO_TCP,
1628 1629 1630 1631 1632 1633
	.name 			= "tcp",
	.pkt_to_tuple 		= tcp_pkt_to_tuple,
	.invert_tuple 		= tcp_invert_tuple,
	.print_tuple 		= tcp_print_tuple,
	.print_conntrack 	= tcp_print_conntrack,
	.packet 		= tcp_packet,
1634
	.get_timeouts		= tcp_get_timeouts,
1635
	.new 			= tcp_new,
1636
	.error			= tcp_error,
I
Igor Maravić 已提交
1637
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1638
	.to_nlattr		= tcp_to_nlattr,
1639
	.nlattr_size		= tcp_nlattr_size,
1640 1641 1642
	.from_nlattr		= nlattr_to_tcp,
	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1643
	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1644
	.nla_policy		= nf_ct_port_nla_policy,
1645
#endif
1646 1647 1648 1649 1650 1651 1652 1653 1654 1655
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
	.ctnl_timeout		= {
		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
		.obj_size	= sizeof(unsigned int) *
					TCP_CONNTRACK_TIMEOUT_MAX,
		.nla_policy	= tcp_timeout_nla_policy,
	},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1656
	.init_net		= tcp_init_net,
1657
	.get_net_proto		= tcp_get_net_proto,
1658
};
1659
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1660

1661
struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1662 1663
{
	.l3proto		= PF_INET6,
1664
	.l4proto 		= IPPROTO_TCP,
1665 1666 1667 1668 1669 1670
	.name 			= "tcp",
	.pkt_to_tuple 		= tcp_pkt_to_tuple,
	.invert_tuple 		= tcp_invert_tuple,
	.print_tuple 		= tcp_print_tuple,
	.print_conntrack 	= tcp_print_conntrack,
	.packet 		= tcp_packet,
1671
	.get_timeouts		= tcp_get_timeouts,
1672
	.new 			= tcp_new,
1673
	.error			= tcp_error,
I
Igor Maravić 已提交
1674
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1675
	.to_nlattr		= tcp_to_nlattr,
1676
	.nlattr_size		= tcp_nlattr_size,
1677 1678 1679
	.from_nlattr		= nlattr_to_tcp,
	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1680
	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1681
	.nla_policy		= nf_ct_port_nla_policy,
1682
#endif
1683 1684 1685 1686 1687 1688 1689 1690 1691 1692
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
	.ctnl_timeout		= {
		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
		.obj_size	= sizeof(unsigned int) *
					TCP_CONNTRACK_TIMEOUT_MAX,
		.nla_policy	= tcp_timeout_nla_policy,
	},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1693
	.init_net		= tcp_init_net,
1694
	.get_net_proto		= tcp_get_net_proto,
1695
};
1696
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);