tcp.h 14.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Definitions for the TCP protocol.
 *
 * Version:	@(#)tcp.h	1.0.2	04/28/93
 *
 * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
#ifndef _LINUX_TCP_H
#define _LINUX_TCP_H

#include <linux/types.h>
#include <asm/byteorder.h>
22
#include <linux/socket.h>
L
Linus Torvalds 已提交
23 24

struct tcphdr {
A
Al Viro 已提交
25 26 27 28
	__be16	source;
	__be16	dest;
	__be32	seq;
	__be32	ack_seq;
L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
#if defined(__LITTLE_ENDIAN_BITFIELD)
	__u16	res1:4,
		doff:4,
		fin:1,
		syn:1,
		rst:1,
		psh:1,
		ack:1,
		urg:1,
		ece:1,
		cwr:1;
#elif defined(__BIG_ENDIAN_BITFIELD)
	__u16	doff:4,
		res1:4,
		cwr:1,
		ece:1,
		urg:1,
		ack:1,
		psh:1,
		rst:1,
		syn:1,
		fin:1;
#else
#error	"Adjust your <asm/byteorder.h> defines"
#endif	
A
Al Viro 已提交
54
	__be16	window;
55
	__sum16	check;
A
Al Viro 已提交
56
	__be16	urg_ptr;
L
Linus Torvalds 已提交
57 58 59 60 61 62 63 64 65
};

/*
 *	The union cast uses a gcc extension to avoid aliasing problems
 *  (union is compatible to any of its members)
 *  This means this part of the code is -fstrict-aliasing safe now.
 */
union tcp_word_hdr { 
	struct tcphdr hdr;
A
Al Viro 已提交
66
	__be32 		  words[5];
L
Linus Torvalds 已提交
67 68 69 70 71
}; 

#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) 

enum { 
72 73 74 75 76 77 78 79 80 81
	TCP_FLAG_CWR = __cpu_to_be32(0x00800000),
	TCP_FLAG_ECE = __cpu_to_be32(0x00400000),
	TCP_FLAG_URG = __cpu_to_be32(0x00200000),
	TCP_FLAG_ACK = __cpu_to_be32(0x00100000),
	TCP_FLAG_PSH = __cpu_to_be32(0x00080000),
	TCP_FLAG_RST = __cpu_to_be32(0x00040000),
	TCP_FLAG_SYN = __cpu_to_be32(0x00020000),
	TCP_FLAG_FIN = __cpu_to_be32(0x00010000),
	TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000),
	TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000)
L
Linus Torvalds 已提交
82 83
}; 

84 85 86 87 88 89
/*
 * TCP general constants
 */
#define TCP_MSS_DEFAULT		 536U	/* IPv4 (RFC1122, RFC2581) */
#define TCP_MSS_DESIRED		1220U	/* IPv6 (tunneled), EDNS0 (RFC3226) */

L
Linus Torvalds 已提交
90 91 92 93 94 95 96 97 98 99 100 101 102
/* TCP socket options */
#define TCP_NODELAY		1	/* Turn off Nagle's algorithm. */
#define TCP_MAXSEG		2	/* Limit MSS */
#define TCP_CORK		3	/* Never send partially complete segments */
#define TCP_KEEPIDLE		4	/* Start keeplives after this period */
#define TCP_KEEPINTVL		5	/* Interval between keepalives */
#define TCP_KEEPCNT		6	/* Number of keepalives before death */
#define TCP_SYNCNT		7	/* Number of SYN retransmits */
#define TCP_LINGER2		8	/* Life time of orphaned FIN-WAIT-2 state */
#define TCP_DEFER_ACCEPT	9	/* Wake up listener only when data arrive */
#define TCP_WINDOW_CLAMP	10	/* Bound advertised window */
#define TCP_INFO		11	/* Information about this connection. */
#define TCP_QUICKACK		12	/* Block/reenable quick acks */
103
#define TCP_CONGESTION		13	/* Congestion control algorithm */
104
#define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
105
#define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
A
Andreas Petlund 已提交
106
#define TCP_THIN_LINEAR_TIMEOUTS 16      /* Use linear timeouts for thin streams*/
A
Andreas Petlund 已提交
107
#define TCP_THIN_DUPACK         17      /* Fast retrans. after 1 dupack */
108
#define TCP_USER_TIMEOUT	18	/* How long for loss retry before timeout */
L
Linus Torvalds 已提交
109

110
/* for TCP_INFO socket option */
L
Linus Torvalds 已提交
111 112 113
#define TCPI_OPT_TIMESTAMPS	1
#define TCPI_OPT_SACK		2
#define TCPI_OPT_WSCALE		4
E
Eric Dumazet 已提交
114 115
#define TCPI_OPT_ECN		8 /* ECN was negociated at TCP session init */
#define TCPI_OPT_ECN_SEEN	16 /* we received at least one packet with ECT */
L
Linus Torvalds 已提交
116

E
Eric Dumazet 已提交
117
enum tcp_ca_state {
L
Linus Torvalds 已提交
118 119 120 121 122 123 124 125 126 127 128 129
	TCP_CA_Open = 0,
#define TCPF_CA_Open	(1<<TCP_CA_Open)
	TCP_CA_Disorder = 1,
#define TCPF_CA_Disorder (1<<TCP_CA_Disorder)
	TCP_CA_CWR = 2,
#define TCPF_CA_CWR	(1<<TCP_CA_CWR)
	TCP_CA_Recovery = 3,
#define TCPF_CA_Recovery (1<<TCP_CA_Recovery)
	TCP_CA_Loss = 4
#define TCPF_CA_Loss	(1<<TCP_CA_Loss)
};

E
Eric Dumazet 已提交
130
struct tcp_info {
L
Linus Torvalds 已提交
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
	__u8	tcpi_state;
	__u8	tcpi_ca_state;
	__u8	tcpi_retransmits;
	__u8	tcpi_probes;
	__u8	tcpi_backoff;
	__u8	tcpi_options;
	__u8	tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;

	__u32	tcpi_rto;
	__u32	tcpi_ato;
	__u32	tcpi_snd_mss;
	__u32	tcpi_rcv_mss;

	__u32	tcpi_unacked;
	__u32	tcpi_sacked;
	__u32	tcpi_lost;
	__u32	tcpi_retrans;
	__u32	tcpi_fackets;

	/* Times. */
	__u32	tcpi_last_data_sent;
	__u32	tcpi_last_ack_sent;     /* Not remembered, sorry. */
	__u32	tcpi_last_data_recv;
	__u32	tcpi_last_ack_recv;

	/* Metrics. */
	__u32	tcpi_pmtu;
	__u32	tcpi_rcv_ssthresh;
	__u32	tcpi_rtt;
	__u32	tcpi_rttvar;
	__u32	tcpi_snd_ssthresh;
	__u32	tcpi_snd_cwnd;
	__u32	tcpi_advmss;
	__u32	tcpi_reordering;

	__u32	tcpi_rcv_rtt;
	__u32	tcpi_rcv_space;

	__u32	tcpi_total_retrans;
};

172 173 174 175 176 177 178 179 180 181 182
/* for TCP_MD5SIG socket option */
#define TCP_MD5SIG_MAXKEYLEN	80

struct tcp_md5sig {
	struct __kernel_sockaddr_storage tcpm_addr;	/* address associated */
	__u16	__tcpm_pad1;				/* zero */
	__u16	tcpm_keylen;				/* key length */
	__u32	__tcpm_pad2;				/* zero */
	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];		/* key (binary) */
};

183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */
#define TCP_COOKIE_MIN		 8		/*  64-bits */
#define TCP_COOKIE_MAX		16		/* 128-bits */
#define TCP_COOKIE_PAIR_SIZE	(2*TCP_COOKIE_MAX)

/* Flags for both getsockopt and setsockopt */
#define TCP_COOKIE_IN_ALWAYS	(1 << 0)	/* Discard SYN without cookie */
#define TCP_COOKIE_OUT_NEVER	(1 << 1)	/* Prohibit outgoing cookies,
						 * supercedes everything. */

/* Flags for getsockopt */
#define TCP_S_DATA_IN		(1 << 2)	/* Was data received? */
#define TCP_S_DATA_OUT		(1 << 3)	/* Was data sent? */

/* TCP_COOKIE_TRANSACTIONS data */
struct tcp_cookie_transactions {
	__u16	tcpct_flags;			/* see above */
	__u8	__tcpct_pad1;			/* zero */
	__u8	tcpct_cookie_desired;		/* bytes */
	__u16	tcpct_s_data_desired;		/* bytes of variable data */
	__u16	tcpct_used;			/* bytes in value */
	__u8	tcpct_value[TCP_MSS_DEFAULT];
};

L
Linus Torvalds 已提交
207 208 209
#ifdef __KERNEL__

#include <linux/skbuff.h>
210
#include <linux/dmaengine.h>
L
Linus Torvalds 已提交
211
#include <net/sock.h>
212
#include <net/inet_connection_sock.h>
213
#include <net/inet_timewait_sock.h>
L
Linus Torvalds 已提交
214

215 216
static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
{
217
	return (struct tcphdr *)skb_transport_header(skb);
218 219
}

220 221
static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
{
222
	return tcp_hdr(skb)->doff * 4;
223 224 225 226
}

static inline unsigned int tcp_optlen(const struct sk_buff *skb)
{
227
	return (tcp_hdr(skb)->doff - 5) * 4;
228 229
}

L
Linus Torvalds 已提交
230
/* This defines a selective acknowledgement block. */
231 232 233 234 235
struct tcp_sack_block_wire {
	__be32	start_seq;
	__be32	end_seq;
};

L
Linus Torvalds 已提交
236
struct tcp_sack_block {
237 238
	u32	start_seq;
	u32	end_seq;
L
Linus Torvalds 已提交
239 240
};

241 242 243 244 245
/*These are used to set the sack_ok field in struct tcp_options_received */
#define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
#define TCP_FACK_ENABLED  (1 << 1)   /*1 = FACK is enabled locally*/
#define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/

L
Linus Torvalds 已提交
246 247 248
struct tcp_options_received {
/*	PAWS/RTTM data	*/
	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
249 250 251 252
	u32	ts_recent;	/* Time stamp to echo next		*/
	u32	rcv_tsval;	/* Time stamp value             	*/
	u32	rcv_tsecr;	/* Time stamp echo reply        	*/
	u16 	saw_tstamp : 1,	/* Saw TIMESTAMP on last packet		*/
L
Linus Torvalds 已提交
253 254 255 256 257 258
		tstamp_ok : 1,	/* TIMESTAMP seen on SYN packet		*/
		dsack : 1,	/* D-SACK is scheduled			*/
		wscale_ok : 1,	/* Wscale seen on SYN packet		*/
		sack_ok : 4,	/* SACK seen on SYN packet		*/
		snd_wscale : 4,	/* Window scaling received from sender	*/
		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
259 260 261
	u8	cookie_plus:6,	/* bytes in authenticator/cookie option	*/
		cookie_out_never:1,
		cookie_in_always:1;
262
	u8	num_sacks;	/* Number of SACK blocks		*/
263
	u16	user_mss;	/* mss requested by user in ioctl	*/
264
	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
L
Linus Torvalds 已提交
265 266
};

267 268
static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
{
269 270 271
	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
	rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
	rx_opt->cookie_plus = 0;
272 273
}

274
/* This is the max number of SACKS that we'll generate and process. It's safe
275
 * to increase this, although since:
276 277 278 279
 *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
 * only four options will fit in a standard TCP header */
#define TCP_NUM_SACKS 4

280 281 282
struct tcp_cookie_values;
struct tcp_request_sock_ops;

283
struct tcp_request_sock {
284 285 286
	struct inet_request_sock 	req;
#ifdef CONFIG_TCP_MD5SIG
	/* Only used by TCP MD5 Signature so far. */
287
	const struct tcp_request_sock_ops *af_specific;
288
#endif
289 290
	u32				rcv_isn;
	u32				snt_isn;
291
	u32				snt_synack; /* synack sent time */
292 293
};

294
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
295 296 297 298
{
	return (struct tcp_request_sock *)req;
}

L
Linus Torvalds 已提交
299
struct tcp_sock {
300 301
	/* inet_connection_sock has to be the first member of tcp_sock */
	struct inet_connection_sock	inet_conn;
302
	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
303
	u16	xmit_size_goal_segs; /* Goal for segmenting output packets */
L
Linus Torvalds 已提交
304 305 306 307 308

/*
 *	Header prediction flags
 *	0x5?10 << 16 + snd_wnd in net byte order
 */
309
	__be32	pred_flags;
L
Linus Torvalds 已提交
310 311 312 313 314 315

/*
 *	RFC793 variables by their proper names. This means you can
 *	read the code and the spec side by side (and laugh ...)
 *	See RFC793 and RFC1122. The RFC writes these in capitals.
 */
316
 	u32	rcv_nxt;	/* What we want to receive next 	*/
317 318
	u32	copied_seq;	/* Head of yet unread data		*/
	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
319
 	u32	snd_nxt;	/* Next sequence we send		*/
L
Linus Torvalds 已提交
320

321 322 323 324
 	u32	snd_una;	/* First byte we want an ack for	*/
 	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
L
Linus Torvalds 已提交
325 326 327 328 329 330 331 332

	/* Data for direct copy to user */
	struct {
		struct sk_buff_head	prequeue;
		struct task_struct	*task;
		struct iovec		*iov;
		int			memory;
		int			len;
333 334 335 336 337 338 339
#ifdef CONFIG_NET_DMA
		/* members for async copy */
		struct dma_chan		*dma_chan;
		int			wakeup;
		struct dma_pinned_list	*pinned_list;
		dma_cookie_t		dma_cookie;
#endif
L
Linus Torvalds 已提交
340 341
	} ucopy;

342 343 344 345
	u32	snd_wl1;	/* Sequence for window update		*/
	u32	snd_wnd;	/* The window we expect to receive	*/
	u32	max_window;	/* Maximal window ever seen from peer	*/
	u32	mss_cache;	/* Cached effective mss, not including SACKS */
L
Linus Torvalds 已提交
346

347 348
	u32	window_clamp;	/* Maximal window to advertise		*/
	u32	rcv_ssthresh;	/* Current window clamp			*/
L
Linus Torvalds 已提交
349

350
	u32	frto_highmark;	/* snd_nxt when RTO occurred */
351
	u16	advmss;		/* Advertised MSS			*/
352
	u8	frto_counter;	/* Number of new acks after RTO */
A
Andreas Petlund 已提交
353 354
	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
		thin_lto    : 1,/* Use linear timeouts for thin streams */
A
Andreas Petlund 已提交
355 356
		thin_dupack : 1,/* Fast retransmit on first dupack      */
		unused      : 2;
L
Linus Torvalds 已提交
357 358

/* RTT measurement */
359 360 361 362 363 364 365 366
	u32	srtt;		/* smoothed round trip time << 3	*/
	u32	mdev;		/* medium deviation			*/
	u32	mdev_max;	/* maximal mdev for the last rtt period	*/
	u32	rttvar;		/* smoothed mdev_max			*/
	u32	rtt_seq;	/* sequence number to update rttvar	*/

	u32	packets_out;	/* Packets which are "in flight"	*/
	u32	retrans_out;	/* Retransmitted packets out		*/
367 368 369

	u16	urg_data;	/* Saved octet of OOB data and control flags */
	u8	ecn_flags;	/* ECN status bits.			*/
I
Ilpo Järvinen 已提交
370 371 372 373
	u8	reordering;	/* Packet reordering metric.		*/
	u32	snd_up;		/* Urgent pointer		*/

	u8	keepalive_probes; /* num of allowed keep alive probes	*/
L
Linus Torvalds 已提交
374 375 376 377 378 379 380 381
/*
 *      Options received (usually on last packet, some only on SYN packets).
 */
	struct tcp_options_received rx_opt;

/*
 *	Slow start and congestion control (see also Nagle, and Karn & Partridge)
 */
382 383
 	u32	snd_ssthresh;	/* Slow start size threshold		*/
 	u32	snd_cwnd;	/* Sending congestion window		*/
I
Ilpo Järvinen 已提交
384
	u32	snd_cwnd_cnt;	/* Linear increase counter		*/
385
	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
386 387
	u32	snd_cwnd_used;
	u32	snd_cwnd_stamp;
388 389 390 391
	u32	prior_cwnd;	/* Congestion window at start of Recovery. */
	u32	prr_delivered;	/* Number of newly delivered packets to
				 * receiver in Recovery. */
	u32	prr_out;	/* Total number of pkts sent during Recovery. */
L
Linus Torvalds 已提交
392

393 394 395
 	u32	rcv_wnd;	/* Current receiver window		*/
	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
396 397 398 399 400
	u32	lost_out;	/* Lost packets			*/
	u32	sacked_out;	/* SACK'd packets			*/
	u32	fackets_out;	/* FACK'd packets			*/
	u32	tso_deferred;
	u32	bytes_acked;	/* Appropriate Byte Counting - RFC3465 */
L
Linus Torvalds 已提交
401

402 403 404 405 406 407 408 409
	/* from STCP, retrans queue hinting */
	struct sk_buff* lost_skb_hint;
	struct sk_buff *scoreboard_skb_hint;
	struct sk_buff *retransmit_skb_hint;

	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */

	/* SACKs data, these 2 need to be together (see tcp_build_and_update_options) */
L
Linus Torvalds 已提交
410 411 412
	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
	struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/

413
	struct tcp_sack_block recv_sack_cache[4];
414

415 416
	struct sk_buff *highest_sack;   /* skb just after the highest
					 * skb with SACKed bit set
417 418 419
					 * (validity guaranteed only if
					 * sacked_out > 0)
					 */
420

421
	int     lost_cnt_hint;
422
	u32     retransmit_high;	/* L-bits may be on up to this seqno */
423

424 425
	u32	lost_retrans_low;	/* Sent seq after any rxmit (lowest) */

I
Ilpo Järvinen 已提交
426
	u32	prior_ssthresh; /* ssthresh saved at recovery start	*/
427
	u32	high_seq;	/* snd_nxt at onset of congestion	*/
L
Linus Torvalds 已提交
428

429
	u32	retrans_stamp;	/* Timestamp of the last retransmit,
L
Linus Torvalds 已提交
430 431
				 * also used in SYN-SENT to remember stamp of
				 * the first SYN. */
432
	u32	undo_marker;	/* tracking retrans started here. */
L
Linus Torvalds 已提交
433
	int	undo_retrans;	/* number of undoable retransmissions. */
434 435
	u32	total_retrans;	/* Total retransmits for entire connection */

436
	u32	urg_seq;	/* Seq of received urgent pointer */
L
Linus Torvalds 已提交
437 438 439
	unsigned int		keepalive_time;	  /* time before keep alive takes place */
	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */

440
	int			linger2;
L
Linus Torvalds 已提交
441 442 443

/* Receiver side RTT estimation */
	struct {
444 445 446
		u32	rtt;
		u32	seq;
		u32	time;
L
Linus Torvalds 已提交
447 448 449 450 451
	} rcv_rtt_est;

/* Receiver queue space */
	struct {
		int	space;
452 453
		u32	seq;
		u32	time;
L
Linus Torvalds 已提交
454
	} rcvq_space;
455 456 457

/* TCP-specific MTU probe information. */
	struct {
458 459
		u32		  probe_seq_start;
		u32		  probe_seq_end;
460
	} mtu_probe;
461 462 463

#ifdef CONFIG_TCP_MD5SIG
/* TCP AF-Specific parts; only used by MD5 Signature support so far */
464
	const struct tcp_sock_af_ops	*af_specific;
465

466
/* TCP MD5 Signature Option information */
467
	struct tcp_md5sig_info	__rcu *md5sig_info;
468
#endif
469 470 471 472 473 474

	/* When the cookie options are generated and exchanged, then this
	 * object holds a reference to them (cookie_values->kref).  Also
	 * contains related tcp_cookie_transactions fields.
	 */
	struct tcp_cookie_values  *cookie_values;
L
Linus Torvalds 已提交
475 476 477 478 479 480 481
};

static inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
	return (struct tcp_sock *)sk;
}

482 483
struct tcp_timewait_sock {
	struct inet_timewait_sock tw_sk;
484 485 486 487
	u32			  tw_rcv_nxt;
	u32			  tw_snd_nxt;
	u32			  tw_rcv_wnd;
	u32			  tw_ts_recent;
488
	long			  tw_ts_recent_stamp;
489
#ifdef CONFIG_TCP_MD5SIG
E
Eric Dumazet 已提交
490
	struct tcp_md5sig_key	*tw_md5_key;
491
#endif
492 493 494 495
	/* Few sockets in timewait have cookies; in that case, then this
	 * object holds a reference to them (tw_cookie_values->kref).
	 */
	struct tcp_cookie_values  *tw_cookie_values;
496 497 498 499 500 501 502
};

static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
{
	return (struct tcp_timewait_sock *)sk;
}

503
#endif	/* __KERNEL__ */
L
Linus Torvalds 已提交
504 505

#endif	/* _LINUX_TCP_H */