提交 2100c8d2 编写于 作者: Y Yuchung Cheng 提交者: David S. Miller

net-tcp: Fast Open base

This patch impelements the common code for both the client and server.

1. TCP Fast Open option processing. Since Fast Open does not have an
   option number assigned by IANA yet, it shares the experiment option
   code 254 by implementing draft-ietf-tcpm-experimental-options
   with a 16 bits magic number 0xF989. This enables global experiments
   without clashing the scarce(2) experimental options available for TCP.

   When the draft status becomes standard (maybe), the client should
   switch to the new option number assigned while the server supports
   both numbers for transistion.

2. The new sysctl tcp_fastopen

3. A place holder init function
Signed-off-by: NYuchung Cheng <ycheng@google.com>
Acked-by: NEric Dumazet <edumazet@google.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 4cce66cd
...@@ -243,6 +243,16 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) ...@@ -243,6 +243,16 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
return (tcp_hdr(skb)->doff - 5) * 4; return (tcp_hdr(skb)->doff - 5) * 4;
} }
/* TCP Fast Open */
#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */
/* TCP Fast Open Cookie as stored in memory */
struct tcp_fastopen_cookie {
s8 len;
u8 val[TCP_FASTOPEN_COOKIE_MAX];
};
/* This defines a selective acknowledgement block. */ /* This defines a selective acknowledgement block. */
struct tcp_sack_block_wire { struct tcp_sack_block_wire {
__be32 start_seq; __be32 start_seq;
......
...@@ -170,6 +170,11 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -170,6 +170,11 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ #define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */
#define TCPOPT_EXP 254 /* Experimental */
/* Magic number to be after the option value for sharing TCP
* experimental options. See draft-ietf-tcpm-experimental-options-00.txt
*/
#define TCPOPT_FASTOPEN_MAGIC 0xF989
/* /*
* TCP option lengths * TCP option lengths
...@@ -180,6 +185,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -180,6 +185,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_SACK_PERM 2 #define TCPOLEN_SACK_PERM 2
#define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_TIMESTAMP 10
#define TCPOLEN_MD5SIG 18 #define TCPOLEN_MD5SIG 18
#define TCPOLEN_EXP_FASTOPEN_BASE 4
#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ #define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */
#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ #define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */
#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) #define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
...@@ -222,6 +228,7 @@ extern int sysctl_tcp_retries1; ...@@ -222,6 +228,7 @@ extern int sysctl_tcp_retries1;
extern int sysctl_tcp_retries2; extern int sysctl_tcp_retries2;
extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_orphan_retries;
extern int sysctl_tcp_syncookies; extern int sysctl_tcp_syncookies;
extern int sysctl_tcp_fastopen;
extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg; extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_rfc1337;
...@@ -418,7 +425,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ...@@ -418,7 +425,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int nonblock, int flags, int *addr_len); size_t len, int nonblock, int flags, int *addr_len);
extern void tcp_parse_options(const struct sk_buff *skb, extern void tcp_parse_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx, const u8 **hvpp, struct tcp_options_received *opt_rx, const u8 **hvpp,
int estab); int estab, struct tcp_fastopen_cookie *foc);
extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/* /*
......
...@@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \ ...@@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \
ip_output.o ip_sockglue.o inet_hashtables.o \ ip_output.o ip_sockglue.o inet_hashtables.o \
inet_timewait_sock.o inet_connection_sock.o \ inet_timewait_sock.o inet_connection_sock.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o tcp_metrics.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
datagram.o raw.o udp.o udplite.o \ datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o \ fib_frontend.o fib_semantics.o fib_trie.o \
......
...@@ -293,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ...@@ -293,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
/* check for timestamp cookie support */ /* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt)); memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, &hash_location, 0); tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
goto out; goto out;
......
...@@ -366,6 +366,13 @@ static struct ctl_table ipv4_table[] = { ...@@ -366,6 +366,13 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
#endif #endif
{
.procname = "tcp_fastopen",
.data = &sysctl_tcp_fastopen,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ {
.procname = "tcp_tw_recycle", .procname = "tcp_tw_recycle",
.data = &tcp_death_row.sysctl_tw_recycle, .data = &tcp_death_row.sysctl_tw_recycle,
......
#include <linux/init.h>
#include <linux/kernel.h>
int sysctl_tcp_fastopen;
static int __init tcp_fastopen_init(void)
{
return 0;
}
late_initcall(tcp_fastopen_init);
...@@ -3732,7 +3732,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ...@@ -3732,7 +3732,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* the fast version below fails. * the fast version below fails.
*/ */
void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
const u8 **hvpp, int estab) const u8 **hvpp, int estab,
struct tcp_fastopen_cookie *foc)
{ {
const unsigned char *ptr; const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb); const struct tcphdr *th = tcp_hdr(skb);
...@@ -3839,8 +3840,25 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o ...@@ -3839,8 +3840,25 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
break; break;
} }
break; break;
}
case TCPOPT_EXP:
/* Fast Open option shares code 254 using a
* 16 bits magic number. It's valid only in
* SYN or SYN-ACK with an even size.
*/
if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
foc == NULL || !th->syn || (opsize & 1))
break;
foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
foc->len <= TCP_FASTOPEN_COOKIE_MAX)
memcpy(foc->val, ptr + 2, foc->len);
else if (foc->len != 0)
foc->len = -1;
break;
}
ptr += opsize-2; ptr += opsize-2;
length -= opsize; length -= opsize;
} }
...@@ -3882,7 +3900,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, ...@@ -3882,7 +3900,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
if (tcp_parse_aligned_timestamp(tp, th)) if (tcp_parse_aligned_timestamp(tp, th))
return true; return true;
} }
tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
return true; return true;
} }
...@@ -5637,7 +5655,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -5637,7 +5655,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_cookie_values *cvp = tp->cookie_values; struct tcp_cookie_values *cvp = tp->cookie_values;
int saved_clamp = tp->rx_opt.mss_clamp; int saved_clamp = tp->rx_opt.mss_clamp;
tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0); tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, NULL);
if (th->ack) { if (th->ack) {
/* rfc793: /* rfc793:
......
...@@ -1307,7 +1307,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -1307,7 +1307,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt); tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
tmp_opt.user_mss = tp->rx_opt.user_mss; tmp_opt.user_mss = tp->rx_opt.user_mss;
tcp_parse_options(skb, &tmp_opt, &hash_location, 0); tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 && if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp && tmp_opt.saw_tstamp &&
......
...@@ -97,7 +97,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, ...@@ -97,7 +97,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0; tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
tcp_parse_options(skb, &tmp_opt, &hash_location, 0); tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.saw_tstamp) { if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent = tcptw->tw_ts_recent;
...@@ -534,7 +534,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, ...@@ -534,7 +534,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0; tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) { if (th->doff > (sizeof(struct tcphdr)>>2)) {
tcp_parse_options(skb, &tmp_opt, &hash_location, 0); tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.saw_tstamp) { if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent; tmp_opt.ts_recent = req->ts_recent;
......
...@@ -385,15 +385,17 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) ...@@ -385,15 +385,17 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_MD5 (1 << 2) #define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3) #define OPTION_WSCALE (1 << 3)
#define OPTION_COOKIE_EXTENSION (1 << 4) #define OPTION_COOKIE_EXTENSION (1 << 4)
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
struct tcp_out_options { struct tcp_out_options {
u8 options; /* bit field of OPTION_* */ u16 options; /* bit field of OPTION_* */
u16 mss; /* 0 to disable */
u8 ws; /* window scale, 0 to disable */ u8 ws; /* window scale, 0 to disable */
u8 num_sack_blocks; /* number of SACK blocks to include */ u8 num_sack_blocks; /* number of SACK blocks to include */
u8 hash_size; /* bytes in hash_location */ u8 hash_size; /* bytes in hash_location */
u16 mss; /* 0 to disable */
__u32 tsval, tsecr; /* need to include OPTION_TS */
__u8 *hash_location; /* temporary pointer, overloaded */ __u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
}; };
/* The sysctl int routines are generic, so check consistency here. /* The sysctl int routines are generic, so check consistency here.
...@@ -442,7 +444,7 @@ static u8 tcp_cookie_size_check(u8 desired) ...@@ -442,7 +444,7 @@ static u8 tcp_cookie_size_check(u8 desired)
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
struct tcp_out_options *opts) struct tcp_out_options *opts)
{ {
u8 options = opts->options; /* mungable copy */ u16 options = opts->options; /* mungable copy */
/* Having both authentication and cookies for security is redundant, /* Having both authentication and cookies for security is redundant,
* and there's certainly not enough room. Instead, the cookie-less * and there's certainly not enough room. Instead, the cookie-less
...@@ -564,6 +566,21 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, ...@@ -564,6 +566,21 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
tp->rx_opt.dsack = 0; tp->rx_opt.dsack = 0;
} }
if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
*ptr++ = htonl((TCPOPT_EXP << 24) |
((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
TCPOPT_FASTOPEN_MAGIC);
memcpy(ptr, foc->val, foc->len);
if ((foc->len & 3) == 2) {
u8 *align = ((u8 *)ptr) + foc->len;
align[0] = align[1] = TCPOPT_NOP;
}
ptr += (foc->len + 3) >> 2;
}
} }
/* Compute TCP options for SYN packets. This is not the final /* Compute TCP options for SYN packets. This is not the final
......
...@@ -177,7 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ...@@ -177,7 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */ /* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt)); memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, &hash_location, 0); tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
goto out; goto out;
......
...@@ -1033,7 +1033,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ...@@ -1033,7 +1033,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt); tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss; tmp_opt.user_mss = tp->rx_opt.user_mss;
tcp_parse_options(skb, &tmp_opt, &hash_location, 0); tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 && if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp && tmp_opt.saw_tstamp &&
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册