Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openanolis
cloud-kernel
提交
2d43f112
cloud-kernel
项目概览
openanolis
/
cloud-kernel
大约 1 年 前同步成功
通知
156
Star
36
Fork
7
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
2
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
cloud-kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
2
合并请求
2
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2d43f112
编写于
11月 05, 2005
作者:
A
Arnaldo Carvalho de Melo
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'red' of 84.73.165.173:/home/tgr/repos/net-2.6
上级
6df71634
bdc450a0
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
891 addition
and
771 deletion
+891
-771
include/linux/pkt_sched.h
include/linux/pkt_sched.h
+24
-26
include/net/inet_ecn.h
include/net/inet_ecn.h
+24
-4
include/net/red.h
include/net/red.h
+325
-0
net/sched/sch_gred.c
net/sched/sch_gred.c
+411
-430
net/sched/sch_red.c
net/sched/sch_red.c
+107
-311
未找到文件。
include/linux/pkt_sched.h
浏览文件 @
2d43f112
...
...
@@ -93,6 +93,7 @@ struct tc_fifo_qopt
/* PRIO section */
#define TCQ_PRIO_BANDS 16
#define TCQ_MIN_PRIO_BANDS 2
struct
tc_prio_qopt
{
...
...
@@ -169,6 +170,7 @@ struct tc_red_qopt
unsigned
char
Scell_log
;
/* cell size for idle damping */
unsigned
char
flags
;
#define TC_RED_ECN 1
#define TC_RED_HARDDROP 2
};
struct
tc_red_xstats
...
...
@@ -194,38 +196,34 @@ enum
#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
#define TCA_SET_OFF TCA_GRED_PARMS
struct
tc_gred_qopt
{
__u32
limit
;
/* HARD maximal queue length (bytes)
*/
__u32
qth_min
;
/* Min average length threshold (bytes)
*/
__u32
qth_max
;
/* Max average length threshold (bytes)
*/
__u32
DP
;
/* upto 2^32 DPs */
__u32
backlog
;
__u32
qave
;
__u32
forced
;
__u32
early
;
__u32
other
;
__u32
pdrop
;
unsigned
char
Wlog
;
/* log(W) */
unsigned
char
Plog
;
/* log(P_max/(qth_max-qth_min)) */
unsigned
char
Scell_log
;
/* cell size for idle damping */
__u8
prio
;
/* prio of this VQ */
__u32
packets
;
__u32
bytesin
;
__u32
limit
;
/* HARD maximal queue length (bytes) */
__u32
qth_min
;
/* Min average length threshold (bytes) */
__u32
qth_max
;
/* Max average length threshold (bytes) */
__u32
DP
;
/* upto 2^32 DPs */
__u32
backlog
;
__u32
qave
;
__u32
forced
;
__u32
early
;
__u32
other
;
__u32
pdrop
;
__u8
Wlog
;
/* log(W) */
__u8
Plog
;
/* log(P_max/(qth_max-qth_min)) */
__u8
Scell_log
;
/* cell size for idle damping */
__u8
prio
;
/* prio of this VQ */
__u32
packets
;
__u32
bytesin
;
};
/* gred setup */
struct
tc_gred_sopt
{
__u32
DPs
;
__u32
def_DP
;
__u8
grio
;
__u8
pad1
;
__u16
pad2
;
__u32
DPs
;
__u32
def_DP
;
__u8
grio
;
__u8
flags
;
__u16
pad1
;
};
/* HTB section */
...
...
include/net/inet_ecn.h
浏览文件 @
2d43f112
...
...
@@ -2,6 +2,7 @@
#define _INET_ECN_H_
#include <linux/ip.h>
#include <linux/skbuff.h>
#include <net/dsfield.h>
enum
{
...
...
@@ -48,7 +49,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
(label) |= __constant_htons(INET_ECN_ECT_0 << 4); \
} while (0)
static
inline
void
IP_ECN_set_ce
(
struct
iphdr
*
iph
)
static
inline
int
IP_ECN_set_ce
(
struct
iphdr
*
iph
)
{
u32
check
=
iph
->
check
;
u32
ecn
=
(
iph
->
tos
+
1
)
&
INET_ECN_MASK
;
...
...
@@ -61,7 +62,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
* INET_ECN_CE => 00
*/
if
(
!
(
ecn
&
2
))
return
;
return
!
ecn
;
/*
* The following gives us:
...
...
@@ -72,6 +73,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
iph
->
check
=
check
+
(
check
>=
0xFFFF
);
iph
->
tos
|=
INET_ECN_CE
;
return
1
;
}
static
inline
void
IP_ECN_clear
(
struct
iphdr
*
iph
)
...
...
@@ -87,11 +89,12 @@ static inline void ipv4_copy_dscp(struct iphdr *outer, struct iphdr *inner)
struct
ipv6hdr
;
static
inline
void
IP6_ECN_set_ce
(
struct
ipv6hdr
*
iph
)
static
inline
int
IP6_ECN_set_ce
(
struct
ipv6hdr
*
iph
)
{
if
(
INET_ECN_is_not_ect
(
ipv6_get_dsfield
(
iph
)))
return
;
return
0
;
*
(
u32
*
)
iph
|=
htonl
(
INET_ECN_CE
<<
20
);
return
1
;
}
static
inline
void
IP6_ECN_clear
(
struct
ipv6hdr
*
iph
)
...
...
@@ -105,4 +108,21 @@ static inline void ipv6_copy_dscp(struct ipv6hdr *outer, struct ipv6hdr *inner)
ipv6_change_dsfield
(
inner
,
INET_ECN_MASK
,
dscp
);
}
static
inline
int
INET_ECN_set_ce
(
struct
sk_buff
*
skb
)
{
switch
(
skb
->
protocol
)
{
case
__constant_htons
(
ETH_P_IP
):
if
(
skb
->
nh
.
raw
+
sizeof
(
struct
iphdr
)
<=
skb
->
tail
)
return
IP_ECN_set_ce
(
skb
->
nh
.
iph
);
break
;
case
__constant_htons
(
ETH_P_IPV6
):
if
(
skb
->
nh
.
raw
+
sizeof
(
struct
ipv6hdr
)
<=
skb
->
tail
)
return
IP6_ECN_set_ce
(
skb
->
nh
.
ipv6h
);
break
;
}
return
0
;
}
#endif
include/net/red.h
0 → 100644
浏览文件 @
2d43f112
#ifndef __NET_SCHED_RED_H
#define __NET_SCHED_RED_H
#include <linux/config.h>
#include <linux/types.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/dsfield.h>
/* Random Early Detection (RED) algorithm.
=======================================
Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
This file codes a "divisionless" version of RED algorithm
as written down in Fig.17 of the paper.
Short description.
------------------
When a new packet arrives we calculate the average queue length:
avg = (1-W)*avg + W*current_queue_len,
W is the filter time constant (chosen as 2^(-Wlog)), it controls
the inertia of the algorithm. To allow larger bursts, W should be
decreased.
if (avg > th_max) -> packet marked (dropped).
if (avg < th_min) -> packet passes.
if (th_min < avg < th_max) we calculate probability:
Pb = max_P * (avg - th_min)/(th_max-th_min)
and mark (drop) packet with this probability.
Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
max_P should be small (not 1), usually 0.01..0.02 is good value.
max_P is chosen as a number, so that max_P/(th_max-th_min)
is a negative power of two in order arithmetics to contain
only shifts.
Parameters, settable by user:
-----------------------------
qth_min - bytes (should be < qth_max/2)
qth_max - bytes (should be at least 2*qth_min and less limit)
Wlog - bits (<32) log(1/W).
Plog - bits (<32)
Plog is related to max_P by formula:
max_P = (qth_max-qth_min)/2^Plog;
F.e. if qth_max=128K and qth_min=32K, then Plog=22
corresponds to max_P=0.02
Scell_log
Stab
Lookup table for log((1-W)^(t/t_ave).
NOTES:
Upper bound on W.
-----------------
If you want to allow bursts of L packets of size S,
you should choose W:
L + 1 - th_min/S < (1-(1-W)^L)/W
th_min/S = 32 th_min/S = 4
log(W) L
-1 33
-2 35
-3 39
-4 46
-5 57
-6 75
-7 101
-8 135
-9 190
etc.
*/
#define RED_STAB_SIZE 256
#define RED_STAB_MASK (RED_STAB_SIZE - 1)
struct
red_stats
{
u32
prob_drop
;
/* Early probability drops */
u32
prob_mark
;
/* Early probability marks */
u32
forced_drop
;
/* Forced drops, qavg > max_thresh */
u32
forced_mark
;
/* Forced marks, qavg > max_thresh */
u32
pdrop
;
/* Drops due to queue limits */
u32
other
;
/* Drops due to drop() calls */
u32
backlog
;
};
struct
red_parms
{
/* Parameters */
u32
qth_min
;
/* Min avg length threshold: A scaled */
u32
qth_max
;
/* Max avg length threshold: A scaled */
u32
Scell_max
;
u32
Rmask
;
/* Cached random mask, see red_rmask */
u8
Scell_log
;
u8
Wlog
;
/* log(W) */
u8
Plog
;
/* random number bits */
u8
Stab
[
RED_STAB_SIZE
];
/* Variables */
int
qcount
;
/* Number of packets since last random
number generation */
u32
qR
;
/* Cached random number */
unsigned
long
qavg
;
/* Average queue length: A scaled */
psched_time_t
qidlestart
;
/* Start of current idle period */
};
static
inline
u32
red_rmask
(
u8
Plog
)
{
return
Plog
<
32
?
((
1
<<
Plog
)
-
1
)
:
~
0UL
;
}
static
inline
void
red_set_parms
(
struct
red_parms
*
p
,
u32
qth_min
,
u32
qth_max
,
u8
Wlog
,
u8
Plog
,
u8
Scell_log
,
u8
*
stab
)
{
/* Reset average queue length, the value is strictly bound
* to the parameters below, reseting hurts a bit but leaving
* it might result in an unreasonable qavg for a while. --TGR
*/
p
->
qavg
=
0
;
p
->
qcount
=
-
1
;
p
->
qth_min
=
qth_min
<<
Wlog
;
p
->
qth_max
=
qth_max
<<
Wlog
;
p
->
Wlog
=
Wlog
;
p
->
Plog
=
Plog
;
p
->
Rmask
=
red_rmask
(
Plog
);
p
->
Scell_log
=
Scell_log
;
p
->
Scell_max
=
(
255
<<
Scell_log
);
memcpy
(
p
->
Stab
,
stab
,
sizeof
(
p
->
Stab
));
}
static
inline
int
red_is_idling
(
struct
red_parms
*
p
)
{
return
!
PSCHED_IS_PASTPERFECT
(
p
->
qidlestart
);
}
static
inline
void
red_start_of_idle_period
(
struct
red_parms
*
p
)
{
PSCHED_GET_TIME
(
p
->
qidlestart
);
}
static
inline
void
red_end_of_idle_period
(
struct
red_parms
*
p
)
{
PSCHED_SET_PASTPERFECT
(
p
->
qidlestart
);
}
static
inline
void
red_restart
(
struct
red_parms
*
p
)
{
red_end_of_idle_period
(
p
);
p
->
qavg
=
0
;
p
->
qcount
=
-
1
;
}
static
inline
unsigned
long
red_calc_qavg_from_idle_time
(
struct
red_parms
*
p
)
{
psched_time_t
now
;
long
us_idle
;
int
shift
;
PSCHED_GET_TIME
(
now
);
us_idle
=
PSCHED_TDIFF_SAFE
(
now
,
p
->
qidlestart
,
p
->
Scell_max
);
/*
* The problem: ideally, average length queue recalcultion should
* be done over constant clock intervals. This is too expensive, so
* that the calculation is driven by outgoing packets.
* When the queue is idle we have to model this clock by hand.
*
* SF+VJ proposed to "generate":
*
* m = idletime / (average_pkt_size / bandwidth)
*
* dummy packets as a burst after idle time, i.e.
*
* p->qavg *= (1-W)^m
*
* This is an apparently overcomplicated solution (f.e. we have to
* precompute a table to make this calculation in reasonable time)
* I believe that a simpler model may be used here,
* but it is field for experiments.
*/
shift
=
p
->
Stab
[(
us_idle
>>
p
->
Scell_log
)
&
RED_STAB_MASK
];
if
(
shift
)
return
p
->
qavg
>>
shift
;
else
{
/* Approximate initial part of exponent with linear function:
*
* (1-W)^m ~= 1-mW + ...
*
* Seems, it is the best solution to
* problem of too coarse exponent tabulation.
*/
us_idle
=
(
p
->
qavg
*
us_idle
)
>>
p
->
Scell_log
;
if
(
us_idle
<
(
p
->
qavg
>>
1
))
return
p
->
qavg
-
us_idle
;
else
return
p
->
qavg
>>
1
;
}
}
static
inline
unsigned
long
red_calc_qavg_no_idle_time
(
struct
red_parms
*
p
,
unsigned
int
backlog
)
{
/*
* NOTE: p->qavg is fixed point number with point at Wlog.
* The formula below is equvalent to floating point
* version:
*
* qavg = qavg*(1-W) + backlog*W;
*
* --ANK (980924)
*/
return
p
->
qavg
+
(
backlog
-
(
p
->
qavg
>>
p
->
Wlog
));
}
static
inline
unsigned
long
red_calc_qavg
(
struct
red_parms
*
p
,
unsigned
int
backlog
)
{
if
(
!
red_is_idling
(
p
))
return
red_calc_qavg_no_idle_time
(
p
,
backlog
);
else
return
red_calc_qavg_from_idle_time
(
p
);
}
static
inline
u32
red_random
(
struct
red_parms
*
p
)
{
return
net_random
()
&
p
->
Rmask
;
}
static
inline
int
red_mark_probability
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
/* The formula used below causes questions.
OK. qR is random number in the interval 0..Rmask
i.e. 0..(2^Plog). If we used floating point
arithmetics, it would be: (2^Plog)*rnd_num,
where rnd_num is less 1.
Taking into account, that qavg have fixed
point at Wlog, and Plog is related to max_P by
max_P = (qth_max-qth_min)/2^Plog; two lines
below have the following floating point equivalent:
max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount
Any questions? --ANK (980924)
*/
return
!
(((
qavg
-
p
->
qth_min
)
>>
p
->
Wlog
)
*
p
->
qcount
<
p
->
qR
);
}
enum
{
RED_BELOW_MIN_THRESH
,
RED_BETWEEN_TRESH
,
RED_ABOVE_MAX_TRESH
,
};
static
inline
int
red_cmp_thresh
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
if
(
qavg
<
p
->
qth_min
)
return
RED_BELOW_MIN_THRESH
;
else
if
(
qavg
>=
p
->
qth_max
)
return
RED_ABOVE_MAX_TRESH
;
else
return
RED_BETWEEN_TRESH
;
}
enum
{
RED_DONT_MARK
,
RED_PROB_MARK
,
RED_HARD_MARK
,
};
static
inline
int
red_action
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
switch
(
red_cmp_thresh
(
p
,
qavg
))
{
case
RED_BELOW_MIN_THRESH
:
p
->
qcount
=
-
1
;
return
RED_DONT_MARK
;
case
RED_BETWEEN_TRESH
:
if
(
++
p
->
qcount
)
{
if
(
red_mark_probability
(
p
,
qavg
))
{
p
->
qcount
=
0
;
p
->
qR
=
red_random
(
p
);
return
RED_PROB_MARK
;
}
}
else
p
->
qR
=
red_random
(
p
);
return
RED_DONT_MARK
;
case
RED_ABOVE_MAX_TRESH
:
p
->
qcount
=
-
1
;
return
RED_HARD_MARK
;
}
BUG
();
return
RED_DONT_MARK
;
}
#endif
net/sched/sch_gred.c
浏览文件 @
2d43f112
此差异已折叠。
点击以展开。
net/sched/sch_red.c
浏览文件 @
2d43f112
...
...
@@ -9,76 +9,23 @@
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Changes:
* J Hadi Salim
<hadi@nortel.com>
980914: computation fixes
* J Hadi Salim 980914: computation fixes
* Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
* J Hadi Salim
<hadi@nortelnetworks.com> 980816: ECN support
* J Hadi Salim
980816: ECN support
*/
#include <linux/config.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/
dsfiel
d.h>
#include <net/
re
d.h>
/* Random Early Detection (RED) algorithm.
=======================================
Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
This file codes a "divisionless" version of RED algorithm
as written down in Fig.17 of the paper.
Short description.
------------------
When a new packet arrives we calculate the average queue length:
avg = (1-W)*avg + W*current_queue_len,
W is the filter time constant (chosen as 2^(-Wlog)), it controls
the inertia of the algorithm. To allow larger bursts, W should be
decreased.
if (avg > th_max) -> packet marked (dropped).
if (avg < th_min) -> packet passes.
if (th_min < avg < th_max) we calculate probability:
Pb = max_P * (avg - th_min)/(th_max-th_min)
and mark (drop) packet with this probability.
Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
max_P should be small (not 1), usually 0.01..0.02 is good value.
max_P is chosen as a number, so that max_P/(th_max-th_min)
is a negative power of two in order arithmetics to contain
only shifts.
Parameters, settable by user:
/* Parameters, settable by user:
-----------------------------
limit - bytes (must be > qth_max + burst)
...
...
@@ -89,243 +36,93 @@ Short description.
arbitrarily high (well, less than ram size)
Really, this limit will never be reached
if RED works correctly.
qth_min - bytes (should be < qth_max/2)
qth_max - bytes (should be at least 2*qth_min and less limit)
Wlog - bits (<32) log(1/W).
Plog - bits (<32)
Plog is related to max_P by formula:
max_P = (qth_max-qth_min)/2^Plog;
F.e. if qth_max=128K and qth_min=32K, then Plog=22
corresponds to max_P=0.02
Scell_log
Stab
Lookup table for log((1-W)^(t/t_ave).
NOTES:
Upper bound on W.
-----------------
If you want to allow bursts of L packets of size S,
you should choose W:
L + 1 - th_min/S < (1-(1-W)^L)/W
th_min/S = 32 th_min/S = 4
log(W) L
-1 33
-2 35
-3 39
-4 46
-5 57
-6 75
-7 101
-8 135
-9 190
etc.
*/
struct
red_sched_data
{
/* Parameters */
u32
limit
;
/* HARD maximal queue length */
u32
qth_min
;
/* Min average length threshold: A scaled */
u32
qth_max
;
/* Max average length threshold: A scaled */
u32
Rmask
;
u32
Scell_max
;
unsigned
char
flags
;
char
Wlog
;
/* log(W) */
char
Plog
;
/* random number bits */
char
Scell_log
;
u8
Stab
[
256
];
/* Variables */
unsigned
long
qave
;
/* Average queue length: A scaled */
int
qcount
;
/* Packets since last random number generation */
u32
qR
;
/* Cached random number */
psched_time_t
qidlestart
;
/* Start of idle period */
struct
tc_red_xstats
st
;
u32
limit
;
/* HARD maximal queue length */
unsigned
char
flags
;
struct
red_parms
parms
;
struct
red_stats
stats
;
};
static
in
t
red_ecn_mark
(
struct
sk_buff
*
skb
)
static
in
line
int
red_use_ecn
(
struct
red_sched_data
*
q
)
{
if
(
skb
->
nh
.
raw
+
20
>
skb
->
tail
)
return
0
;
switch
(
skb
->
protocol
)
{
case
__constant_htons
(
ETH_P_IP
):
if
(
INET_ECN_is_not_ect
(
skb
->
nh
.
iph
->
tos
))
return
0
;
IP_ECN_set_ce
(
skb
->
nh
.
iph
);
return
1
;
case
__constant_htons
(
ETH_P_IPV6
):
if
(
INET_ECN_is_not_ect
(
ipv6_get_dsfield
(
skb
->
nh
.
ipv6h
)))
return
0
;
IP6_ECN_set_ce
(
skb
->
nh
.
ipv6h
);
return
1
;
default:
return
0
;
}
return
q
->
flags
&
TC_RED_ECN
;
}
static
int
red_enqueue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
static
inline
int
red_use_harddrop
(
struct
red_sched_data
*
q
)
{
return
q
->
flags
&
TC_RED_HARDDROP
;
}
static
int
red_enqueue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
psched_time_t
now
;
q
->
parms
.
qavg
=
red_calc_qavg
(
&
q
->
parms
,
sch
->
qstats
.
backlog
)
;
if
(
!
PSCHED_IS_PASTPERFECT
(
q
->
qidlestart
))
{
long
us_idle
;
int
shift
;
if
(
red_is_idling
(
&
q
->
parms
))
red_end_of_idle_period
(
&
q
->
parms
);
PSCHED_GET_TIME
(
now
);
us_idle
=
PSCHED_TDIFF_SAFE
(
now
,
q
->
qidlestart
,
q
->
Scell_max
);
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
)
;
switch
(
red_action
(
&
q
->
parms
,
q
->
parms
.
qavg
))
{
case
RED_DONT_MARK
:
break
;
/*
The problem: ideally, average length queue recalcultion should
be done over constant clock intervals. This is too expensive, so that
the calculation is driven by outgoing packets.
When the queue is idle we have to model this clock by hand.
SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
dummy packets as a burst after idle time, i.e.
q->qave *= (1-W)^m
This is an apparently overcomplicated solution (f.e. we have to precompute
a table to make this calculation in reasonable time)
I believe that a simpler model may be used here,
but it is field for experiments.
*/
shift
=
q
->
Stab
[
us_idle
>>
q
->
Scell_log
];
if
(
shift
)
{
q
->
qave
>>=
shift
;
}
else
{
/* Approximate initial part of exponent
with linear function:
(1-W)^m ~= 1-mW + ...
Seems, it is the best solution to
problem of too coarce exponent tabulation.
*/
us_idle
=
(
q
->
qave
*
us_idle
)
>>
q
->
Scell_log
;
if
(
us_idle
<
q
->
qave
/
2
)
q
->
qave
-=
us_idle
;
else
q
->
qave
>>=
1
;
}
}
else
{
q
->
qave
+=
sch
->
qstats
.
backlog
-
(
q
->
qave
>>
q
->
Wlog
);
/* NOTE:
q->qave is fixed point number with point at Wlog.
The formulae above is equvalent to floating point
version:
qave = qave*(1-W) + sch->qstats.backlog*W;
--ANK (980924)
*/
}
case
RED_PROB_MARK
:
sch
->
qstats
.
overlimits
++
;
if
(
!
red_use_ecn
(
q
)
||
!
INET_ECN_set_ce
(
skb
))
{
q
->
stats
.
prob_drop
++
;
goto
congestion_drop
;
}
if
(
q
->
qave
<
q
->
qth_min
)
{
q
->
qcount
=
-
1
;
enqueue:
if
(
sch
->
qstats
.
backlog
+
skb
->
len
<=
q
->
limit
)
{
__skb_queue_tail
(
&
sch
->
q
,
skb
);
sch
->
qstats
.
backlog
+=
skb
->
len
;
sch
->
bstats
.
bytes
+=
skb
->
len
;
sch
->
bstats
.
packets
++
;
return
NET_XMIT_SUCCESS
;
}
else
{
q
->
st
.
pdrop
++
;
}
kfree_skb
(
skb
);
sch
->
qstats
.
drops
++
;
return
NET_XMIT_DROP
;
}
if
(
q
->
qave
>=
q
->
qth_max
)
{
q
->
qcount
=
-
1
;
sch
->
qstats
.
overlimits
++
;
mark:
if
(
!
(
q
->
flags
&
TC_RED_ECN
)
||
!
red_ecn_mark
(
skb
))
{
q
->
st
.
early
++
;
goto
drop
;
}
q
->
st
.
marked
++
;
goto
enqueue
;
}
q
->
stats
.
prob_mark
++
;
break
;
case
RED_HARD_MARK
:
sch
->
qstats
.
overlimits
++
;
if
(
red_use_harddrop
(
q
)
||
!
red_use_ecn
(
q
)
||
!
INET_ECN_set_ce
(
skb
))
{
q
->
stats
.
forced_drop
++
;
goto
congestion_drop
;
}
if
(
++
q
->
qcount
)
{
/* The formula used below causes questions.
OK. qR is random number in the interval 0..Rmask
i.e. 0..(2^Plog). If we used floating point
arithmetics, it would be: (2^Plog)*rnd_num,
where rnd_num is less 1.
Taking into account, that qave have fixed
point at Wlog, and Plog is related to max_P by
max_P = (qth_max-qth_min)/2^Plog; two lines
below have the following floating point equivalent:
max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount
Any questions? --ANK (980924)
*/
if
(((
q
->
qave
-
q
->
qth_min
)
>>
q
->
Wlog
)
*
q
->
qcount
<
q
->
qR
)
goto
enqueue
;
q
->
qcount
=
0
;
q
->
qR
=
net_random
()
&
q
->
Rmask
;
sch
->
qstats
.
overlimits
++
;
goto
mark
;
q
->
stats
.
forced_mark
++
;
break
;
}
q
->
qR
=
net_random
()
&
q
->
Rmask
;
goto
enqueue
;
drop:
kfree_skb
(
skb
);
sch
->
qstats
.
drops
++
;
if
(
sch
->
qstats
.
backlog
+
skb
->
len
<=
q
->
limit
)
return
qdisc_enqueue_tail
(
skb
,
sch
);
q
->
stats
.
pdrop
++
;
return
qdisc_drop
(
skb
,
sch
);
congestion_drop:
qdisc_drop
(
skb
,
sch
);
return
NET_XMIT_CN
;
}
static
int
red_requeue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
static
int
red_requeue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
if
(
red_is_idling
(
&
q
->
parms
))
red_end_of_idle_period
(
&
q
->
parms
);
__skb_queue_head
(
&
sch
->
q
,
skb
);
sch
->
qstats
.
backlog
+=
skb
->
len
;
sch
->
qstats
.
requeues
++
;
return
0
;
return
qdisc_requeue
(
skb
,
sch
);
}
static
struct
sk_buff
*
red_dequeue
(
struct
Qdisc
*
sch
)
static
struct
sk_buff
*
red_dequeue
(
struct
Qdisc
*
sch
)
{
struct
sk_buff
*
skb
;
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
skb
=
__skb_dequeue
(
&
sch
->
q
);
if
(
skb
)
{
sch
->
qstats
.
backlog
-=
skb
->
len
;
return
skb
;
}
PSCHED_GET_TIME
(
q
->
qidlestart
);
return
NULL
;
skb
=
qdisc_dequeue_head
(
sch
);
if
(
skb
==
NULL
&&
!
red_is_idling
(
&
q
->
parms
))
red_start_of_idle_period
(
&
q
->
parms
);
return
skb
;
}
static
unsigned
int
red_drop
(
struct
Qdisc
*
sch
)
...
...
@@ -333,16 +130,17 @@ static unsigned int red_drop(struct Qdisc* sch)
struct
sk_buff
*
skb
;
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
skb
=
__skb_dequeue_tail
(
&
sch
->
q
);
skb
=
qdisc_dequeue_tail
(
sch
);
if
(
skb
)
{
unsigned
int
len
=
skb
->
len
;
sch
->
qstats
.
backlog
-=
len
;
sch
->
qstats
.
drops
++
;
q
->
st
.
other
++
;
kfree_skb
(
skb
);
q
->
stats
.
other
++
;
qdisc_drop
(
skb
,
sch
);
return
len
;
}
PSCHED_GET_TIME
(
q
->
qidlestart
);
if
(
!
red_is_idling
(
&
q
->
parms
))
red_start_of_idle_period
(
&
q
->
parms
);
return
0
;
}
...
...
@@ -350,43 +148,38 @@ static void red_reset(struct Qdisc* sch)
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
__skb_queue_purge
(
&
sch
->
q
);
sch
->
qstats
.
backlog
=
0
;
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
q
->
qave
=
0
;
q
->
qcount
=
-
1
;
qdisc_reset_queue
(
sch
);
red_restart
(
&
q
->
parms
);
}
static
int
red_change
(
struct
Qdisc
*
sch
,
struct
rtattr
*
opt
)
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
rtattr
*
tb
[
TCA_RED_
STAB
];
struct
rtattr
*
tb
[
TCA_RED_
MAX
];
struct
tc_red_qopt
*
ctl
;
if
(
opt
==
NULL
||
rtattr_parse_nested
(
tb
,
TCA_RED_STAB
,
opt
)
||
tb
[
TCA_RED_PARMS
-
1
]
==
0
||
tb
[
TCA_RED_STAB
-
1
]
==
0
||
if
(
opt
==
NULL
||
rtattr_parse_nested
(
tb
,
TCA_RED_MAX
,
opt
))
return
-
EINVAL
;
if
(
tb
[
TCA_RED_PARMS
-
1
]
==
NULL
||
RTA_PAYLOAD
(
tb
[
TCA_RED_PARMS
-
1
])
<
sizeof
(
*
ctl
)
||
RTA_PAYLOAD
(
tb
[
TCA_RED_STAB
-
1
])
<
256
)
tb
[
TCA_RED_STAB
-
1
]
==
NULL
||
RTA_PAYLOAD
(
tb
[
TCA_RED_STAB
-
1
])
<
RED_STAB_SIZE
)
return
-
EINVAL
;
ctl
=
RTA_DATA
(
tb
[
TCA_RED_PARMS
-
1
]);
sch_tree_lock
(
sch
);
q
->
flags
=
ctl
->
flags
;
q
->
Wlog
=
ctl
->
Wlog
;
q
->
Plog
=
ctl
->
Plog
;
q
->
Rmask
=
ctl
->
Plog
<
32
?
((
1
<<
ctl
->
Plog
)
-
1
)
:
~
0UL
;
q
->
Scell_log
=
ctl
->
Scell_log
;
q
->
Scell_max
=
(
255
<<
q
->
Scell_log
);
q
->
qth_min
=
ctl
->
qth_min
<<
ctl
->
Wlog
;
q
->
qth_max
=
ctl
->
qth_max
<<
ctl
->
Wlog
;
q
->
limit
=
ctl
->
limit
;
memcpy
(
q
->
Stab
,
RTA_DATA
(
tb
[
TCA_RED_STAB
-
1
]),
256
);
q
->
qcount
=
-
1
;
red_set_parms
(
&
q
->
parms
,
ctl
->
qth_min
,
ctl
->
qth_max
,
ctl
->
Wlog
,
ctl
->
Plog
,
ctl
->
Scell_log
,
RTA_DATA
(
tb
[
TCA_RED_STAB
-
1
]));
if
(
skb_queue_empty
(
&
sch
->
q
))
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
red_end_of_idle_period
(
&
q
->
parms
);
sch_tree_unlock
(
sch
);
return
0
;
}
...
...
@@ -399,39 +192,39 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt)
static
int
red_dump
(
struct
Qdisc
*
sch
,
struct
sk_buff
*
skb
)
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
unsigned
char
*
b
=
skb
->
tail
;
struct
rtattr
*
rta
;
struct
tc_red_qopt
opt
;
rta
=
(
struct
rtattr
*
)
b
;
RTA_PUT
(
skb
,
TCA_OPTIONS
,
0
,
NULL
);
opt
.
limit
=
q
->
limit
;
opt
.
qth_min
=
q
->
qth_min
>>
q
->
Wlog
;
opt
.
qth_max
=
q
->
qth_max
>>
q
->
Wlog
;
opt
.
Wlog
=
q
->
Wlog
;
opt
.
Plog
=
q
->
Plog
;
opt
.
Scell_log
=
q
->
Scell_log
;
opt
.
flags
=
q
->
flags
;
struct
rtattr
*
opts
=
NULL
;
struct
tc_red_qopt
opt
=
{
.
limit
=
q
->
limit
,
.
flags
=
q
->
flags
,
.
qth_min
=
q
->
parms
.
qth_min
>>
q
->
parms
.
Wlog
,
.
qth_max
=
q
->
parms
.
qth_max
>>
q
->
parms
.
Wlog
,
.
Wlog
=
q
->
parms
.
Wlog
,
.
Plog
=
q
->
parms
.
Plog
,
.
Scell_log
=
q
->
parms
.
Scell_log
,
};
opts
=
RTA_NEST
(
skb
,
TCA_OPTIONS
);
RTA_PUT
(
skb
,
TCA_RED_PARMS
,
sizeof
(
opt
),
&
opt
);
rta
->
rta_len
=
skb
->
tail
-
b
;
return
skb
->
len
;
return
RTA_NEST_END
(
skb
,
opts
);
rtattr_failure:
skb_trim
(
skb
,
b
-
skb
->
data
);
return
-
1
;
return
RTA_NEST_CANCEL
(
skb
,
opts
);
}
static
int
red_dump_stats
(
struct
Qdisc
*
sch
,
struct
gnet_dump
*
d
)
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
return
gnet_stats_copy_app
(
d
,
&
q
->
st
,
sizeof
(
q
->
st
));
struct
tc_red_xstats
st
=
{
.
early
=
q
->
stats
.
prob_drop
+
q
->
stats
.
forced_drop
,
.
pdrop
=
q
->
stats
.
pdrop
,
.
other
=
q
->
stats
.
other
,
.
marked
=
q
->
stats
.
prob_mark
+
q
->
stats
.
forced_mark
,
};
return
gnet_stats_copy_app
(
d
,
&
st
,
sizeof
(
st
));
}
static
struct
Qdisc_ops
red_qdisc_ops
=
{
.
next
=
NULL
,
.
cl_ops
=
NULL
,
.
id
=
"red"
,
.
priv_size
=
sizeof
(
struct
red_sched_data
),
.
enqueue
=
red_enqueue
,
...
...
@@ -450,10 +243,13 @@ static int __init red_module_init(void)
{
return
register_qdisc
(
&
red_qdisc_ops
);
}
static
void
__exit
red_module_exit
(
void
)
static
void
__exit
red_module_exit
(
void
)
{
unregister_qdisc
(
&
red_qdisc_ops
);
}
module_init
(
red_module_init
)
module_exit
(
red_module_exit
)
MODULE_LICENSE
(
"GPL"
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录