提交 66b13d99 编写于 作者: E Eric Dumazet 提交者: David S. Miller

ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT

There is a long standing bug in linux tcp stack, about ACK messages sent
on behalf of TIME_WAIT sockets.

In the IP header of the ACK message, we choose to reflect TOS field of
incoming message, and this might break some setups.

Example of things that were broken :
  - Routing using TOS as a selector
  - Firewalls
  - Trafic classification / shaping

We now remember in timewait structure the inet tos field and use it in
ACK generation, and route lookup.

Notes :
 - We still reflect incoming TOS in RST messages.
 - We could extend MuraliRaja Muniraju patch to report TOS value in
netlink messages for TIME_WAIT sockets.
 - A patch is needed for IPv6
Signed-off-by: NEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 318cf7aa
...@@ -126,7 +126,8 @@ struct inet_timewait_sock { ...@@ -126,7 +126,8 @@ struct inet_timewait_sock {
/* And these are ours. */ /* And these are ours. */
unsigned int tw_ipv6only : 1, unsigned int tw_ipv6only : 1,
tw_transparent : 1, tw_transparent : 1,
tw_pad : 14, /* 14 bits hole */ tw_pad : 6, /* 6 bits hole */
tw_tos : 8,
tw_ipv6_offset : 16; tw_ipv6_offset : 16;
kmemcheck_bitfield_end(flags); kmemcheck_bitfield_end(flags);
unsigned long tw_ttd; unsigned long tw_ttd;
......
...@@ -165,6 +165,7 @@ struct ip_reply_arg { ...@@ -165,6 +165,7 @@ struct ip_reply_arg {
int csumoffset; /* u16 offset of csum in iov[0].iov_base */ int csumoffset; /* u16 offset of csum in iov[0].iov_base */
/* -1 if not needed */ /* -1 if not needed */
int bound_dev_if; int bound_dev_if;
u8 tos;
}; };
#define IP_REPLY_ARG_NOSRCCHECK 1 #define IP_REPLY_ARG_NOSRCCHECK 1
...@@ -175,7 +176,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) ...@@ -175,7 +176,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
} }
void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
struct ip_reply_arg *arg, unsigned int len); const struct ip_reply_arg *arg, unsigned int len);
struct ipv4_config { struct ipv4_config {
int log_martians; int log_martians;
......
...@@ -183,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat ...@@ -183,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_daddr = inet->inet_daddr; tw->tw_daddr = inet->inet_daddr;
tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr;
tw->tw_bound_dev_if = sk->sk_bound_dev_if; tw->tw_bound_dev_if = sk->sk_bound_dev_if;
tw->tw_tos = inet->tos;
tw->tw_num = inet->inet_num; tw->tw_num = inet->inet_num;
tw->tw_state = TCP_TIME_WAIT; tw->tw_state = TCP_TIME_WAIT;
tw->tw_substate = state; tw->tw_substate = state;
......
...@@ -1466,7 +1466,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, ...@@ -1466,7 +1466,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
* structure to pass arguments. * structure to pass arguments.
*/ */
void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
struct ip_reply_arg *arg, unsigned int len) const struct ip_reply_arg *arg, unsigned int len)
{ {
struct inet_sock *inet = inet_sk(sk); struct inet_sock *inet = inet_sk(sk);
struct ip_options_data replyopts; struct ip_options_data replyopts;
...@@ -1489,7 +1489,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, ...@@ -1489,7 +1489,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
} }
flowi4_init_output(&fl4, arg->bound_dev_if, 0, flowi4_init_output(&fl4, arg->bound_dev_if, 0,
RT_TOS(ip_hdr(skb)->tos), RT_TOS(arg->tos),
RT_SCOPE_UNIVERSE, sk->sk_protocol, RT_SCOPE_UNIVERSE, sk->sk_protocol,
ip_reply_arg_flowi_flags(arg), ip_reply_arg_flowi_flags(arg),
daddr, rt->rt_spec_dst, daddr, rt->rt_spec_dst,
...@@ -1506,7 +1506,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, ...@@ -1506,7 +1506,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
with locally disabled BH and that sk cannot be already spinlocked. with locally disabled BH and that sk cannot be already spinlocked.
*/ */
bh_lock_sock(sk); bh_lock_sock(sk);
inet->tos = ip_hdr(skb)->tos; inet->tos = arg->tos;
sk->sk_priority = skb->priority; sk->sk_priority = skb->priority;
sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if; sk->sk_bound_dev_if = arg->bound_dev_if;
......
...@@ -652,6 +652,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) ...@@ -652,6 +652,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
net = dev_net(skb_dst(skb)->dev); net = dev_net(skb_dst(skb)->dev);
arg.tos = ip_hdr(skb)->tos;
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
&arg, arg.iov[0].iov_len); &arg, arg.iov[0].iov_len);
...@@ -666,7 +667,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) ...@@ -666,7 +667,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 ts, int oif, u32 win, u32 ts, int oif,
struct tcp_md5sig_key *key, struct tcp_md5sig_key *key,
int reply_flags) int reply_flags, u8 tos)
{ {
const struct tcphdr *th = tcp_hdr(skb); const struct tcphdr *th = tcp_hdr(skb);
struct { struct {
...@@ -726,7 +727,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ...@@ -726,7 +727,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.csumoffset = offsetof(struct tcphdr, check) / 2;
if (oif) if (oif)
arg.bound_dev_if = oif; arg.bound_dev_if = oif;
arg.tos = tos;
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
&arg, arg.iov[0].iov_len); &arg, arg.iov[0].iov_len);
...@@ -743,7 +744,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) ...@@ -743,7 +744,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_ts_recent, tcptw->tw_ts_recent,
tw->tw_bound_dev_if, tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw), tcp_twsk_md5_key(tcptw),
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
tw->tw_tos
); );
inet_twsk_put(tw); inet_twsk_put(tw);
...@@ -757,7 +759,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, ...@@ -757,7 +759,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
req->ts_recent, req->ts_recent,
0, 0,
tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
} }
/* /*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册