Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
Kernel
提交
79ffeeb9
K
Kernel
项目概览
openeuler
/
Kernel
1 年多 前同步成功
通知
8
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
Kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
79ffeeb9
编写于
11月 10, 2005
作者:
L
Linus Torvalds
浏览文件
操作
浏览文件
下载
差异文件
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
上级
a5aac37f
6a438bbe
变更
19
展开全部
隐藏空白更改
内联
并排
Showing
19 changed file
with
413 addition
and
199 deletion
+413
-199
Documentation/networking/ip-sysctl.txt
Documentation/networking/ip-sysctl.txt
+5
-0
include/linux/sysctl.h
include/linux/sysctl.h
+1
-0
include/linux/tcp.h
include/linux/tcp.h
+16
-0
include/net/sock.h
include/net/sock.h
+6
-0
include/net/tcp.h
include/net/tcp.h
+65
-6
net/ipv4/sysctl_net_ipv4.c
net/ipv4/sysctl_net_ipv4.c
+8
-0
net/ipv4/tcp.c
net/ipv4/tcp.c
+2
-1
net/ipv4/tcp_bic.c
net/ipv4/tcp_bic.c
+5
-7
net/ipv4/tcp_cong.c
net/ipv4/tcp_cong.c
+24
-16
net/ipv4/tcp_highspeed.c
net/ipv4/tcp_highspeed.c
+5
-6
net/ipv4/tcp_htcp.c
net/ipv4/tcp_htcp.c
+6
-7
net/ipv4/tcp_hybla.c
net/ipv4/tcp_hybla.c
+3
-3
net/ipv4/tcp_input.c
net/ipv4/tcp_input.c
+194
-94
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_ipv4.c
+2
-2
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_minisocks.c
+4
-3
net/ipv4/tcp_output.c
net/ipv4/tcp_output.c
+47
-14
net/ipv4/tcp_scalable.c
net/ipv4/tcp_scalable.c
+7
-7
net/ipv4/tcp_timer.c
net/ipv4/tcp_timer.c
+2
-2
net/ipv4/tcp_vegas.c
net/ipv4/tcp_vegas.c
+11
-31
未找到文件。
Documentation/networking/ip-sysctl.txt
浏览文件 @
79ffeeb9
...
...
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
TCP variables:
tcp_abc - INTEGER
Controls Appropriate Byte Count defined in RFC3465. If set to
0 then does congestion avoid once per ack. 1 is conservative
value, and 2 is more agressive.
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 255. Default value
...
...
include/linux/sysctl.h
浏览文件 @
79ffeeb9
...
...
@@ -390,6 +390,7 @@ enum
NET_TCP_BIC_BETA
=
108
,
NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR
=
109
,
NET_TCP_CONG_CONTROL
=
110
,
NET_TCP_ABC
=
111
,
};
enum
{
...
...
include/linux/tcp.h
浏览文件 @
79ffeeb9
...
...
@@ -307,6 +307,21 @@ struct tcp_sock {
struct
tcp_sack_block
duplicate_sack
[
1
];
/* D-SACK block */
struct
tcp_sack_block
selective_acks
[
4
];
/* The SACKS themselves*/
struct
tcp_sack_block
recv_sack_cache
[
4
];
/* from STCP, retrans queue hinting */
struct
sk_buff
*
lost_skb_hint
;
struct
sk_buff
*
scoreboard_skb_hint
;
struct
sk_buff
*
retransmit_skb_hint
;
struct
sk_buff
*
forward_skb_hint
;
struct
sk_buff
*
fastpath_skb_hint
;
int
fastpath_cnt_hint
;
int
lost_cnt_hint
;
int
retransmit_cnt_hint
;
int
forward_cnt_hint
;
__u16
advmss
;
/* Advertised MSS */
__u16
prior_ssthresh
;
/* ssthresh saved at recovery start */
__u32
lost_out
;
/* Lost packets */
...
...
@@ -326,6 +341,7 @@ struct tcp_sock {
__u32
snd_up
;
/* Urgent pointer */
__u32
total_retrans
;
/* Total retransmits for entire connection */
__u32
bytes_acked
;
/* Appropriate Byte Counting - RFC3465 */
unsigned
int
keepalive_time
;
/* time before keep alive takes place */
unsigned
int
keepalive_intvl
;
/* time interval between keep alive probes */
...
...
include/net/sock.h
浏览文件 @
79ffeeb9
...
...
@@ -1247,6 +1247,12 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
(skb != (struct sk_buff *)&(sk)->sk_write_queue); \
skb = skb->next)
/*from STCP for fast SACK Process*/
#define sk_stream_for_retrans_queue_from(skb, sk) \
for (; (skb != (sk)->sk_send_head) && \
(skb != (struct sk_buff *)&(sk)->sk_write_queue); \
skb = skb->next)
/*
* Default write policy as shown to user space via poll/select/SIGIO
*/
...
...
include/net/tcp.h
浏览文件 @
79ffeeb9
...
...
@@ -89,10 +89,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
*/
#define TCP_SYN_RETRIES 5
/* number of times to retry active opening a
* connection: ~180sec is RFC min
u
mum */
* connection: ~180sec is RFC min
i
mum */
#define TCP_SYNACK_RETRIES 5
/* number of times to retry passive opening a
* connection: ~180sec is RFC min
u
mum */
* connection: ~180sec is RFC min
i
mum */
#define TCP_ORPHAN_RETRIES 7
/* number of times to retry on an orphaned
...
...
@@ -180,7 +180,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
/* Flags in tp->nonagle */
#define TCP_NAGLE_OFF 1
/* Nagle's algo is disabled */
#define TCP_NAGLE_CORK 2
/* Socket is corked */
#define TCP_NAGLE_PUSH 4
/* Cork is overriden for already queued data */
#define TCP_NAGLE_PUSH 4
/* Cork is overrid
d
en for already queued data */
extern
struct
inet_timewait_death_row
tcp_death_row
;
...
...
@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
extern
int
sysctl_tcp_nometrics_save
;
extern
int
sysctl_tcp_moderate_rcvbuf
;
extern
int
sysctl_tcp_tso_win_divisor
;
extern
int
sysctl_tcp_abc
;
extern
atomic_t
tcp_memory_allocated
;
extern
atomic_t
tcp_sockets_allocated
;
...
...
@@ -551,13 +552,13 @@ extern u32 __tcp_select_window(struct sock *sk);
/* TCP timestamps are only 32-bits, this causes a slight
* complication on 64-bit systems since we store a snapshot
* of jiffies in the buffer control blocks below. We decidely
* of jiffies in the buffer control blocks below. We decide
d
ly
* only use of the low 32-bits of jiffies and hide the ugly
* casts with the following macro.
*/
#define tcp_time_stamp ((__u32)(jiffies))
/* This is what the send packet queu
e
ing engine uses to pass
/* This is what the send packet queuing engine uses to pass
* TCP per-packet control information to the transmission
* code. We also store the host-order sequence numbers in
* here too. This is 36 bytes on 32-bit architectures,
...
...
@@ -597,7 +598,7 @@ struct tcp_skb_cb {
#define TCPCB_EVER_RETRANS 0x80
/* Ever retransmitted frame */
#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
#define TCPCB_URG 0x20
/* Urgent pointer adv
e
nced here */
#define TCPCB_URG 0x20
/* Urgent pointer adv
a
nced here */
#define TCPCB_AT_TAIL (TCPCB_URG)
...
...
@@ -765,6 +766,33 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
(
tp
->
snd_cwnd
>>
2
)));
}
/*
* Linear increase during slow start
*/
static
inline
void
tcp_slow_start
(
struct
tcp_sock
*
tp
)
{
if
(
sysctl_tcp_abc
)
{
/* RFC3465: Slow Start
* TCP sender SHOULD increase cwnd by the number of
* previously unacknowledged bytes ACKed by each incoming
* acknowledgment, provided the increase is not more than L
*/
if
(
tp
->
bytes_acked
<
tp
->
mss_cache
)
return
;
/* We MAY increase by 2 if discovered delayed ack */
if
(
sysctl_tcp_abc
>
1
&&
tp
->
bytes_acked
>
2
*
tp
->
mss_cache
)
{
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
}
tp
->
bytes_acked
=
0
;
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
static
inline
void
tcp_sync_left_out
(
struct
tcp_sock
*
tp
)
{
if
(
tp
->
rx_opt
.
sack_ok
&&
...
...
@@ -794,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
tp
->
prior_ssthresh
=
0
;
tp
->
bytes_acked
=
0
;
if
(
inet_csk
(
sk
)
->
icsk_ca_state
<
TCP_CA_CWR
)
{
__tcp_enter_cwr
(
sk
);
tcp_set_ca_state
(
sk
,
TCP_CA_CWR
);
...
...
@@ -810,6 +839,27 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
return
3
;
}
/* RFC2861 Check whether we are limited by application or congestion window
* This is the inverse of cwnd check in tcp_tso_should_defer
*/
static
inline
int
tcp_is_cwnd_limited
(
const
struct
sock
*
sk
,
u32
in_flight
)
{
const
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
u32
left
;
if
(
in_flight
>=
tp
->
snd_cwnd
)
return
1
;
if
(
!
(
sk
->
sk_route_caps
&
NETIF_F_TSO
))
return
0
;
left
=
tp
->
snd_cwnd
-
in_flight
;
if
(
sysctl_tcp_tso_win_divisor
)
return
left
*
sysctl_tcp_tso_win_divisor
<
tp
->
snd_cwnd
;
else
return
left
<=
tcp_max_burst
(
tp
);
}
static
__inline__
void
tcp_minshall_update
(
struct
tcp_sock
*
tp
,
int
mss
,
const
struct
sk_buff
*
skb
)
{
...
...
@@ -1157,6 +1207,15 @@ static inline void tcp_mib_init(void)
TCP_ADD_STATS_USER
(
TCP_MIB_MAXCONN
,
-
1
);
}
/*from STCP */
static
inline
void
clear_all_retrans_hints
(
struct
tcp_sock
*
tp
){
tp
->
lost_skb_hint
=
NULL
;
tp
->
scoreboard_skb_hint
=
NULL
;
tp
->
retransmit_skb_hint
=
NULL
;
tp
->
forward_skb_hint
=
NULL
;
tp
->
fastpath_skb_hint
=
NULL
;
}
/* /proc */
enum
tcp_seq_states
{
TCP_SEQ_STATE_LISTENING
,
...
...
net/ipv4/sysctl_net_ipv4.c
浏览文件 @
79ffeeb9
...
...
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
.
proc_handler
=
&
proc_tcp_congestion_control
,
.
strategy
=
&
sysctl_tcp_congestion_control
,
},
{
.
ctl_name
=
NET_TCP_ABC
,
.
procname
=
"tcp_abc"
,
.
data
=
&
sysctl_tcp_abc
,
.
maxlen
=
sizeof
(
int
),
.
mode
=
0644
,
.
proc_handler
=
&
proc_dointvec
,
},
{
.
ctl_name
=
0
}
};
...
...
net/ipv4/tcp.c
浏览文件 @
79ffeeb9
...
...
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
}
else
if
(
tcp_need_reset
(
old_state
)
||
(
tp
->
snd_nxt
!=
tp
->
write_seq
&&
(
1
<<
old_state
)
&
(
TCPF_CLOSING
|
TCPF_LAST_ACK
)))
{
/* The last check adjusts for discrepanc
e
of Linux wrt. RFC
/* The last check adjusts for discrepanc
y
of Linux wrt. RFC
* states
*/
tcp_send_active_reset
(
sk
,
gfp_any
());
...
...
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp
->
packets_out
=
0
;
tp
->
snd_ssthresh
=
0x7fffffff
;
tp
->
snd_cwnd_cnt
=
0
;
tp
->
bytes_acked
=
0
;
tcp_set_ca_state
(
sk
,
TCP_CA_Open
);
tcp_clear_retrans
(
tp
);
inet_csk_delack_init
(
sk
);
...
...
net/ipv4/tcp_bic.c
浏览文件 @
79ffeeb9
...
...
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
bictcp_low_utilization
(
sk
,
data_acked
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
)
)
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
/* In "safe" area, increase. */
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
else
{
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tcp_slow_start
(
tp
);
else
{
bictcp_update
(
ca
,
tp
->
snd_cwnd
);
/* In dangerous area, increase slowly.
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
*/
if
(
tp
->
snd_cwnd_cnt
>=
ca
->
cnt
)
{
...
...
net/ipv4/tcp_cong.c
浏览文件 @
79ffeeb9
...
...
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
)
)
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
/* In "safe" area, increase. */
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
else
{
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
*/
if
(
tp
->
snd_cwnd_cnt
>=
tp
->
snd_cwnd
)
{
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
tp
->
snd_cwnd_cnt
=
0
;
}
else
tp
->
snd_cwnd_cnt
++
;
}
/* In "safe" area, increase. */
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tcp_slow_start
(
tp
);
/* In dangerous area, increase slowly. */
else
if
(
sysctl_tcp_abc
)
{
/* RFC3465: Apppriate Byte Count
* increase once for each full cwnd acked
*/
if
(
tp
->
bytes_acked
>=
tp
->
snd_cwnd
*
tp
->
mss_cache
)
{
tp
->
bytes_acked
-=
tp
->
snd_cwnd
*
tp
->
mss_cache
;
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
}
else
{
/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
if
(
tp
->
snd_cwnd_cnt
>=
tp
->
snd_cwnd
)
{
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
tp
->
snd_cwnd_cnt
=
0
;
}
else
tp
->
snd_cwnd_cnt
++
;
}
}
EXPORT_SYMBOL_GPL
(
tcp_reno_cong_avoid
);
...
...
net/ipv4/tcp_highspeed.c
浏览文件 @
79ffeeb9
...
...
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
}
static
void
hstcp_cong_avoid
(
struct
sock
*
sk
,
u32
adk
,
u32
rtt
,
u32
in_flight
,
int
goo
d
)
u32
in_flight
,
u32
pkts_acke
d
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
hstcp
*
ca
=
inet_csk_ca
(
sk
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
)
)
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
else
{
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tcp_slow_start
(
tp
);
else
{
/* Update AIMD parameters */
if
(
tp
->
snd_cwnd
>
hstcp_aimd_vals
[
ca
->
ai
].
cwnd
)
{
while
(
tp
->
snd_cwnd
>
hstcp_aimd_vals
[
ca
->
ai
].
cwnd
&&
...
...
net/ipv4/tcp_htcp.c
浏览文件 @
79ffeeb9
...
...
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
htcp
*
ca
=
inet_csk_ca
(
sk
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
)
)
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
/* In "safe" area, increase. */
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
}
else
{
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tcp_slow_start
(
tp
);
else
{
measure_rtt
(
sk
);
/* keep track of number of round-trip times since last backoff event */
...
...
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
htcp_alpha_update
(
ca
);
}
/* In dangerous area, increase slowly.
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
*/
if
((
tp
->
snd_cwnd_cnt
++
*
ca
->
alpha
)
>>
7
>=
tp
->
snd_cwnd
)
{
...
...
net/ipv4/tcp_hybla.c
浏览文件 @
79ffeeb9
...
...
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
ca
->
minrtt
=
tp
->
srtt
;
}
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
))
return
;
if
(
!
ca
->
hybla_en
)
return
tcp_reno_cong_avoid
(
sk
,
ack
,
rtt
,
in_flight
,
flag
);
if
(
in_flight
<
tp
->
snd_cwnd
)
return
;
if
(
ca
->
rho
==
0
)
hybla_recalc_param
(
sk
);
...
...
net/ipv4/tcp_input.c
浏览文件 @
79ffeeb9
此差异已折叠。
点击以展开。
net/ipv4/tcp_ipv4.c
浏览文件 @
79ffeeb9
...
...
@@ -39,7 +39,7 @@
* request_sock handling and moved
* most of it into the af independent code.
* Added tail drop and some other bugfixes.
* Added new listen sematics.
* Added new listen sema
n
tics.
* Mike McLagan : Routing by source
* Juan Jose Ciarlante: ip_dynaddr bits
* Andi Kleen: various fixes.
...
...
@@ -1210,7 +1210,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
/* An explanation is required here, I think.
* Packet length and doff are validated by header prediction,
* provided case of th->doff==0 is elimin
e
ted.
* provided case of th->doff==0 is elimin
a
ted.
* So, we defer the checks. */
if
((
skb
->
ip_summed
!=
CHECKSUM_UNNECESSARY
&&
tcp_v4_checksum_init
(
skb
)))
...
...
net/ipv4/tcp_minisocks.c
浏览文件 @
79ffeeb9
...
...
@@ -158,7 +158,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
/* I am shamed, but failed to make it more elegant.
* Yes, it is direct reference to IP, which is impossible
* to generalize to IPv6. Taking into account that IPv6
* do not under
tsna
d recycling in any case, it not
* do not under
stan
d recycling in any case, it not
* a big problem in practice. --ANK */
if
(
tw
->
tw_family
==
AF_INET
&&
tcp_death_row
.
sysctl_tw_recycle
&&
tcptw
->
tw_ts_recent_stamp
&&
...
...
@@ -194,7 +194,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
/* In window segment, it may be only reset or bare ack. */
if
(
th
->
rst
)
{
/* This is TIME_WAIT assasination, in two flavors.
/* This is TIME_WAIT assas
s
ination, in two flavors.
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
...
...
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
*/
newtp
->
snd_cwnd
=
2
;
newtp
->
snd_cwnd_cnt
=
0
;
newtp
->
bytes_acked
=
0
;
newtp
->
frto_counter
=
0
;
newtp
->
frto_highmark
=
0
;
...
...
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
/* RFC793 page 36: "If the connection is in any non-synchronized state ...
* and the incoming segment acknowledges something not yet
* sent (the segment carries an unacc
a
ptable ACK) ...
* sent (the segment carries an unacc
e
ptable ACK) ...
* a reset is sent."
*
* Invalid ACK: reset will be sent by listening socket
...
...
net/ipv4/tcp_output.c
浏览文件 @
79ffeeb9
...
...
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
u16
flags
;
BUG_ON
(
len
>
skb
->
len
);
clear_all_retrans_hints
(
tp
);
nsize
=
skb_headlen
(
skb
)
-
len
;
if
(
nsize
<
0
)
nsize
=
0
;
...
...
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
for TCP options, but includes only bare TCP header.
tp->rx_opt.mss_clamp is mss negotiated at connection setup.
It is min
u
mum of user_mss and mss received with SYN.
It is min
i
mum of user_mss and mss received with SYN.
It also does not include TCP options.
tp->pmtu_cookie is last pmtu, seen by this function.
...
...
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
{
struct
inet_connection_sock
*
icsk
=
inet_csk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
/* MSS for the peer's data. Previous verions used mss_clamp
/* MSS for the peer's data. Previous ver
s
ions used mss_clamp
* here. I don't know if the value based on our guesses
* of peer's MSS is better for the performance. It's more correct
* but may be worse for the performance because of rcv_mss
...
...
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
BUG_ON
(
tcp_skb_pcount
(
skb
)
!=
1
||
tcp_skb_pcount
(
next_skb
)
!=
1
);
/* Ok. We will be able to collapse the packet. */
/* changing transmit queue under us so clear hints */
clear_all_retrans_hints
(
tp
);
/* Ok. We will be able to collapse the packet. */
__skb_unlink
(
next_skb
,
&
sk
->
sk_write_queue
);
memcpy
(
skb_put
(
skb
,
next_skb_size
),
next_skb
->
data
,
next_skb_size
);
...
...
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
}
}
clear_all_retrans_hints
(
tp
);
if
(
!
lost
)
return
;
...
...
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
int
err
;
/* Do not sent more than we queued. 1/4 is reserved for possible
* copying overhead: fr
g
agmentation, tunneling, mangling etc.
* copying overhead: fragmentation, tunneling, mangling etc.
*/
if
(
atomic_read
(
&
sk
->
sk_wmem_alloc
)
>
min
(
sk
->
sk_wmem_queued
+
(
sk
->
sk_wmem_queued
>>
2
),
sk
->
sk_sndbuf
))
...
...
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
const
struct
inet_connection_sock
*
icsk
=
inet_csk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
sk_buff
*
skb
;
int
packet_cnt
=
tp
->
lost_out
;
int
packet_cnt
;
if
(
tp
->
retransmit_skb_hint
)
{
skb
=
tp
->
retransmit_skb_hint
;
packet_cnt
=
tp
->
retransmit_cnt_hint
;
}
else
{
skb
=
sk
->
sk_write_queue
.
next
;
packet_cnt
=
0
;
}
/* First pass: retransmit lost packets. */
if
(
packet_cn
t
)
{
sk_stream_for_retrans_queue
(
skb
,
sk
)
{
if
(
tp
->
lost_ou
t
)
{
sk_stream_for_retrans_queue
_from
(
skb
,
sk
)
{
__u8
sacked
=
TCP_SKB_CB
(
skb
)
->
sacked
;
/* we could do better than to assign each time */
tp
->
retransmit_skb_hint
=
skb
;
tp
->
retransmit_cnt_hint
=
packet_cnt
;
/* Assume this retransmit will generate
* only one packet for congestion window
* calculation purposes. This works because
...
...
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if
(
tcp_packets_in_flight
(
tp
)
>=
tp
->
snd_cwnd
)
return
;
if
(
sacked
&
TCPCB_LOST
)
{
if
(
sacked
&
TCPCB_LOST
)
{
if
(
!
(
sacked
&
(
TCPCB_SACKED_ACKED
|
TCPCB_SACKED_RETRANS
)))
{
if
(
tcp_retransmit_skb
(
sk
,
skb
))
if
(
tcp_retransmit_skb
(
sk
,
skb
))
{
tp
->
retransmit_skb_hint
=
NULL
;
return
;
}
if
(
icsk
->
icsk_ca_state
!=
TCP_CA_Loss
)
NET_INC_STATS_BH
(
LINUX_MIB_TCPFASTRETRANS
);
else
...
...
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
TCP_RTO_MAX
);
}
packet_cnt
-
=
tcp_skb_pcount
(
skb
);
if
(
packet_cnt
<=
0
)
packet_cnt
+
=
tcp_skb_pcount
(
skb
);
if
(
packet_cnt
>=
tp
->
lost_out
)
break
;
}
}
...
...
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if
(
tcp_may_send_now
(
sk
,
tp
))
return
;
packet_cnt
=
0
;
if
(
tp
->
forward_skb_hint
)
{
skb
=
tp
->
forward_skb_hint
;
packet_cnt
=
tp
->
forward_cnt_hint
;
}
else
{
skb
=
sk
->
sk_write_queue
.
next
;
packet_cnt
=
0
;
}
sk_stream_for_retrans_queue_from
(
skb
,
sk
)
{
tp
->
forward_cnt_hint
=
packet_cnt
;
tp
->
forward_skb_hint
=
skb
;
sk_stream_for_retrans_queue
(
skb
,
sk
)
{
/* Similar to the retransmit loop above we
* can pretend that the retransmitted SKB
* we send out here will be composed of one
...
...
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
continue
;
/* Ok, retransmit it. */
if
(
tcp_retransmit_skb
(
sk
,
skb
))
if
(
tcp_retransmit_skb
(
sk
,
skb
))
{
tp
->
forward_skb_hint
=
NULL
;
break
;
}
if
(
skb
==
skb_peek
(
&
sk
->
sk_write_queue
))
inet_csk_reset_xmit_timer
(
sk
,
ICSK_TIME_RETRANS
,
...
...
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL
(
tcp_make_synack
);
EXPORT_SYMBOL
(
tcp_simple_retransmit
);
EXPORT_SYMBOL
(
tcp_sync_mss
);
EXPORT_SYMBOL
(
sysctl_tcp_tso_win_divisor
);
net/ipv4/tcp_scalable.c
浏览文件 @
79ffeeb9
...
...
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32
in_flight
,
int
flag
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
if
(
in_flight
<
tp
->
snd_cwnd
)
if
(
!
tcp_is_cwnd_limited
(
sk
,
in_flight
))
return
;
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
{
t
p
->
snd_cwnd
++
;
}
else
{
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
t
cp_slow_start
(
tp
)
;
else
{
tp
->
snd_cwnd_cnt
++
;
if
(
tp
->
snd_cwnd_cnt
>
min
(
tp
->
snd_cwnd
,
TCP_SCALABLE_AI_CNT
)){
tp
->
snd_cwnd
++
;
if
(
tp
->
snd_cwnd
<
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
++
;
tp
->
snd_cwnd_cnt
=
0
;
}
}
tp
->
snd_cwnd
=
min_t
(
u32
,
tp
->
snd_cwnd
,
tp
->
snd_cwnd_clamp
);
tp
->
snd_cwnd_stamp
=
tcp_time_stamp
;
}
static
u32
tcp_scalable_ssthresh
(
struct
sock
*
sk
)
...
...
net/ipv4/tcp_timer.c
浏览文件 @
79ffeeb9
...
...
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
* to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket.
*
* Criteri
um
is still not confirmed experimentally and may change.
* Criteri
a
is still not confirmed experimentally and may change.
* We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured
* limit.
...
...
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
hole detection. :-(
It is place to make it. It is not made. I do not want
to make it. It is disgu
i
sting. It does not work in any
to make it. It is disgusting. It does not work in any
case. Let me to cite the same draft, which requires for
us to implement this:
...
...
net/ipv4/tcp_vegas.c
浏览文件 @
79ffeeb9
...
...
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
/* We don't have enough RTT samples to do the Vegas
* calculation, so we'll behave like Reno.
*/
if
(
tp
->
snd_cwnd
>
tp
->
snd_ssthresh
)
tp
->
snd_cwnd
++
;
tcp_reno_cong_avoid
(
sk
,
ack
,
seq_rtt
,
in_flight
,
cnt
);
}
else
{
u32
rtt
,
target_cwnd
,
diff
;
...
...
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
*/
diff
=
(
old_wnd
<<
V_PARAM_SHIFT
)
-
target_cwnd
;
if
(
tp
->
snd_cwnd
<
tp
->
snd_ssthresh
)
{
if
(
tp
->
snd_cwnd
<
=
tp
->
snd_ssthresh
)
{
/* Slow start. */
if
(
diff
>
gamma
)
{
/* Going too fast. Time to slow down
...
...
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
V_PARAM_SHIFT
)
+
1
);
}
tcp_slow_start
(
tp
);
}
else
{
/* Congestion avoidance. */
u32
next_snd_cwnd
;
...
...
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
else
if
(
next_snd_cwnd
<
tp
->
snd_cwnd
)
tp
->
snd_cwnd
--
;
}
}
/* Wipe the slate clean for the next RTT. */
vegas
->
cntRTT
=
0
;
vegas
->
minRTT
=
0x7fffffff
;
if
(
tp
->
snd_cwnd
<
2
)
tp
->
snd_cwnd
=
2
;
else
if
(
tp
->
snd_cwnd
>
tp
->
snd_cwnd_clamp
)
tp
->
snd_cwnd
=
tp
->
snd_cwnd_clamp
;
}
}
/* The following code is executed for every ack we receive,
* except for conditions checked in should_advance_cwnd()
* before the call to tcp_cong_avoid(). Mainly this means that
* we only execute this code if the ack actually acked some
* data.
*/
/* If we are in slow start, increase our cwnd in response to this ACK.
* (If we are not in slow start then we are in congestion avoidance,
* and adjust our congestion window only once per RTT. See the code
* above.)
*/
if
(
tp
->
snd_cwnd
<=
tp
->
snd_ssthresh
)
tp
->
snd_cwnd
++
;
/* to keep cwnd from growing without bound */
tp
->
snd_cwnd
=
min_t
(
u32
,
tp
->
snd_cwnd
,
tp
->
snd_cwnd_clamp
);
/* Make sure that we are never so timid as to reduce our cwnd below
* 2 MSS.
*
* Going below 2 MSS would risk huge delayed ACKs from our receiver.
*/
tp
->
snd_cwnd
=
max
(
tp
->
snd_cwnd
,
2U
);
/* Wipe the slate clean for the next RTT. */
vegas
->
cntRTT
=
0
;
vegas
->
minRTT
=
0x7fffffff
;
}
/* Extract info for Tcp socket info provided via netlink. */
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录