Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
raspberrypi-kernel
提交
5432ebb5
R
raspberrypi-kernel
项目概览
openeuler
/
raspberrypi-kernel
通知
13
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
raspberrypi-kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5432ebb5
编写于
7月 05, 2005
作者:
L
Linus Torvalds
浏览文件
操作
浏览文件
下载
差异文件
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
上级
d06e7a56
b2f57102
变更
34
隐藏空白更改
内联
并排
Showing
34 changed file
with
1191 addition
and
617 deletion
+1191
-617
Documentation/networking/fib_trie.txt
Documentation/networking/fib_trie.txt
+145
-0
drivers/net/shaper.c
drivers/net/shaper.c
+16
-26
drivers/net/skge.h
drivers/net/skge.h
+1
-0
drivers/net/tg3.c
drivers/net/tg3.c
+65
-4
drivers/net/tg3.h
drivers/net/tg3.h
+10
-0
include/linux/if_shaper.h
include/linux/if_shaper.h
+1
-1
include/linux/skbuff.h
include/linux/skbuff.h
+9
-10
include/linux/tc_ematch/tc_em_meta.h
include/linux/tc_ematch/tc_em_meta.h
+1
-1
include/linux/tcp.h
include/linux/tcp.h
+1
-1
include/net/pkt_sched.h
include/net/pkt_sched.h
+3
-14
include/net/sch_generic.h
include/net/sch_generic.h
+13
-0
include/net/slhc_vj.h
include/net/slhc_vj.h
+8
-13
include/net/sock.h
include/net/sock.h
+5
-2
include/net/tcp.h
include/net/tcp.h
+17
-139
net/core/dev.c
net/core/dev.c
+3
-2
net/core/filter.c
net/core/filter.c
+32
-72
net/core/skbuff.c
net/core/skbuff.c
+0
-2
net/decnet/dn_fib.c
net/decnet/dn_fib.c
+2
-1
net/ipv4/af_inet.c
net/ipv4/af_inet.c
+11
-0
net/ipv4/fib_trie.c
net/ipv4/fib_trie.c
+168
-34
net/ipv4/ip_output.c
net/ipv4/ip_output.c
+0
-16
net/ipv4/route.c
net/ipv4/route.c
+75
-51
net/ipv4/tcp.c
net/ipv4/tcp.c
+24
-20
net/ipv4/tcp_input.c
net/ipv4/tcp_input.c
+37
-39
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_ipv4.c
+1
-1
net/ipv4/tcp_output.c
net/ipv4/tcp_output.c
+435
-109
net/ipv6/af_inet6.c
net/ipv6/af_inet6.c
+2
-2
net/ipv6/ip6_output.c
net/ipv6/ip6_output.c
+0
-1
net/ipv6/tcp_ipv6.c
net/ipv6/tcp_ipv6.c
+1
-1
net/sched/Makefile
net/sched/Makefile
+1
-1
net/sched/em_meta.c
net/sched/em_meta.c
+0
-6
net/sched/sch_api.c
net/sched/sch_api.c
+26
-37
net/sched/sch_blackhole.c
net/sched/sch_blackhole.c
+54
-0
net/sched/sch_generic.c
net/sched/sch_generic.c
+24
-11
未找到文件。
Documentation/networking/fib_trie.txt
0 → 100644
浏览文件 @
5432ebb5
LC-trie implementation notes.
Node types
----------
leaf
An end node with data. This has a copy of the relevant key, along
with 'hlist' with routing table entries sorted by prefix length.
See struct leaf and struct leaf_info.
trie node or tnode
An internal node, holding an array of child (leaf or tnode) pointers,
indexed through a subset of the key. See Level Compression.
A few concepts explained
------------------------
Bits (tnode)
The number of bits in the key segment used for indexing into the
child array - the "child index". See Level Compression.
Pos (tnode)
The position (in the key) of the key segment used for indexing into
the child array. See Path Compression.
Path Compression / skipped bits
Any given tnode is linked to from the child array of its parent, using
a segment of the key specified by the parent's "pos" and "bits"
In certain cases, this tnode's own "pos" will not be immediately
adjacent to the parent (pos+bits), but there will be some bits
in the key skipped over because they represent a single path with no
deviations. These "skipped bits" constitute Path Compression.
Note that the search algorithm will simply skip over these bits when
searching, making it necessary to save the keys in the leaves to
verify that they actually do match the key we are searching for.
Level Compression / child arrays
the trie is kept level balanced moving, under certain conditions, the
children of a full child (see "full_children") up one level, so that
instead of a pure binary tree, each internal node ("tnode") may
contain an arbitrarily large array of links to several children.
Conversely, a tnode with a mostly empty child array (see empty_children)
may be "halved", having some of its children moved downwards one level,
in order to avoid ever-increasing child arrays.
empty_children
the number of positions in the child array of a given tnode that are
NULL.
full_children
the number of children of a given tnode that aren't path compressed.
(in other words, they aren't NULL or leaves and their "pos" is equal
to this tnode's "pos"+"bits").
(The word "full" here is used more in the sense of "complete" than
as the opposite of "empty", which might be a tad confusing.)
Comments
---------
We have tried to keep the structure of the code as close to fib_hash as
possible to allow verification and help up reviewing.
fib_find_node()
A good start for understanding this code. This function implements a
straightforward trie lookup.
fib_insert_node()
Inserts a new leaf node in the trie. This is bit more complicated than
fib_find_node(). Inserting a new node means we might have to run the
level compression algorithm on part of the trie.
trie_leaf_remove()
Looks up a key, deletes it and runs the level compression algorithm.
trie_rebalance()
The key function for the dynamic trie after any change in the trie
it is run to optimize and reorganize. Tt will walk the trie upwards
towards the root from a given tnode, doing a resize() at each step
to implement level compression.
resize()
Analyzes a tnode and optimizes the child array size by either inflating
or shrinking it repeatedly until it fullfills the criteria for optimal
level compression. This part follows the original paper pretty closely
and there may be some room for experimentation here.
inflate()
Doubles the size of the child array within a tnode. Used by resize().
halve()
Halves the size of the child array within a tnode - the inverse of
inflate(). Used by resize();
fn_trie_insert(), fn_trie_delete(), fn_trie_select_default()
The route manipulation functions. Should conform pretty closely to the
corresponding functions in fib_hash.
fn_trie_flush()
This walks the full trie (using nextleaf()) and searches for empty
leaves which have to be removed.
fn_trie_dump()
Dumps the routing table ordered by prefix length. This is somewhat
slower than the corresponding fib_hash function, as we have to walk the
entire trie for each prefix length. In comparison, fib_hash is organized
as one "zone"/hash per prefix length.
Locking
-------
fib_lock is used for an RW-lock in the same way that this is done in fib_hash.
However, the functions are somewhat separated for other possible locking
scenarios. It might conceivably be possible to run trie_rebalance via RCU
to avoid read_lock in the fn_trie_lookup() function.
Main lookup mechanism
---------------------
fn_trie_lookup() is the main lookup function.
The lookup is in its simplest form just like fib_find_node(). We descend the
trie, key segment by key segment, until we find a leaf. check_leaf() does
the fib_semantic_match in the leaf's sorted prefix hlist.
If we find a match, we are done.
If we don't find a match, we enter prefix matching mode. The prefix length,
starting out at the same as the key length, is reduced one step at a time,
and we backtrack upwards through the trie trying to find a longest matching
prefix. The goal is always to reach a leaf and get a positive result from the
fib_semantic_match mechanism.
Inside each tnode, the search for longest matching prefix consists of searching
through the child array, chopping off (zeroing) the least significant "1" of
the child index until we find a match or the child index consists of nothing but
zeros.
At this point we backtrack (t->stats.backtrack++) up the trie, continuing to
chop off part of the key in order to find the longest matching prefix.
At this point we will repeatedly descend subtries to look for a match, and there
are some optimizations available that can provide us with "shortcuts" to avoid
descending into dead ends. Look for "HL_OPTIMIZE" sections in the code.
To alleviate any doubts about the correctness of the route selection process,
a new netlink operation has been added. Look for NETLINK_FIB_LOOKUP, which
gives userland access to fib_lookup().
drivers/net/shaper.c
浏览文件 @
5432ebb5
...
@@ -135,10 +135,8 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
...
@@ -135,10 +135,8 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
{
struct
shaper
*
shaper
=
dev
->
priv
;
struct
shaper
*
shaper
=
dev
->
priv
;
struct
sk_buff
*
ptr
;
struct
sk_buff
*
ptr
;
if
(
down_trylock
(
&
shaper
->
sem
))
spin_lock
(
&
shaper
->
lock
);
return
-
1
;
ptr
=
shaper
->
sendq
.
prev
;
ptr
=
shaper
->
sendq
.
prev
;
/*
/*
...
@@ -232,7 +230,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
...
@@ -232,7 +230,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
shaper
->
stats
.
collisions
++
;
shaper
->
stats
.
collisions
++
;
}
}
shaper_kick
(
shaper
);
shaper_kick
(
shaper
);
up
(
&
shaper
->
sem
);
spin_unlock
(
&
shaper
->
lock
);
return
0
;
return
0
;
}
}
...
@@ -271,11 +269,9 @@ static void shaper_timer(unsigned long data)
...
@@ -271,11 +269,9 @@ static void shaper_timer(unsigned long data)
{
{
struct
shaper
*
shaper
=
(
struct
shaper
*
)
data
;
struct
shaper
*
shaper
=
(
struct
shaper
*
)
data
;
if
(
!
down_trylock
(
&
shaper
->
sem
))
{
spin_lock
(
&
shaper
->
lock
);
shaper_kick
(
shaper
);
shaper_kick
(
shaper
);
up
(
&
shaper
->
sem
);
spin_unlock
(
&
shaper
->
lock
);
}
else
mod_timer
(
&
shaper
->
timer
,
jiffies
);
}
}
/*
/*
...
@@ -331,21 +327,6 @@ static void shaper_kick(struct shaper *shaper)
...
@@ -331,21 +327,6 @@ static void shaper_kick(struct shaper *shaper)
}
}
/*
* Flush the shaper queues on a closedown
*/
static
void
shaper_flush
(
struct
shaper
*
shaper
)
{
struct
sk_buff
*
skb
;
down
(
&
shaper
->
sem
);
while
((
skb
=
skb_dequeue
(
&
shaper
->
sendq
))
!=
NULL
)
dev_kfree_skb
(
skb
);
shaper_kick
(
shaper
);
up
(
&
shaper
->
sem
);
}
/*
/*
* Bring the interface up. We just disallow this until a
* Bring the interface up. We just disallow this until a
* bind.
* bind.
...
@@ -375,7 +356,15 @@ static int shaper_open(struct net_device *dev)
...
@@ -375,7 +356,15 @@ static int shaper_open(struct net_device *dev)
static
int
shaper_close
(
struct
net_device
*
dev
)
static
int
shaper_close
(
struct
net_device
*
dev
)
{
{
struct
shaper
*
shaper
=
dev
->
priv
;
struct
shaper
*
shaper
=
dev
->
priv
;
shaper_flush
(
shaper
);
struct
sk_buff
*
skb
;
while
((
skb
=
skb_dequeue
(
&
shaper
->
sendq
))
!=
NULL
)
dev_kfree_skb
(
skb
);
spin_lock_bh
(
&
shaper
->
lock
);
shaper_kick
(
shaper
);
spin_unlock_bh
(
&
shaper
->
lock
);
del_timer_sync
(
&
shaper
->
timer
);
del_timer_sync
(
&
shaper
->
timer
);
return
0
;
return
0
;
}
}
...
@@ -576,6 +565,7 @@ static void shaper_init_priv(struct net_device *dev)
...
@@ -576,6 +565,7 @@ static void shaper_init_priv(struct net_device *dev)
init_timer
(
&
sh
->
timer
);
init_timer
(
&
sh
->
timer
);
sh
->
timer
.
function
=
shaper_timer
;
sh
->
timer
.
function
=
shaper_timer
;
sh
->
timer
.
data
=
(
unsigned
long
)
sh
;
sh
->
timer
.
data
=
(
unsigned
long
)
sh
;
spin_lock_init
(
&
sh
->
lock
);
}
}
/*
/*
...
...
drivers/net/skge.h
浏览文件 @
5432ebb5
...
@@ -7,6 +7,7 @@
...
@@ -7,6 +7,7 @@
/* PCI config registers */
/* PCI config registers */
#define PCI_DEV_REG1 0x40
#define PCI_DEV_REG1 0x40
#define PCI_DEV_REG2 0x44
#define PCI_DEV_REG2 0x44
#define PCI_REV_DESC 0x4
#define PCI_STATUS_ERROR_BITS (PCI_STATUS_DETECTED_PARITY | \
#define PCI_STATUS_ERROR_BITS (PCI_STATUS_DETECTED_PARITY | \
PCI_STATUS_SIG_SYSTEM_ERROR | \
PCI_STATUS_SIG_SYSTEM_ERROR | \
...
...
drivers/net/tg3.c
浏览文件 @
5432ebb5
...
@@ -66,8 +66,8 @@
...
@@ -66,8 +66,8 @@
#define DRV_MODULE_NAME "tg3"
#define DRV_MODULE_NAME "tg3"
#define PFX DRV_MODULE_NAME ": "
#define PFX DRV_MODULE_NAME ": "
#define DRV_MODULE_VERSION "3.3
2
"
#define DRV_MODULE_VERSION "3.3
3
"
#define DRV_MODULE_RELDATE "Ju
ne 24
, 2005"
#define DRV_MODULE_RELDATE "Ju
ly 5
, 2005"
#define TG3_DEF_MAC_MODE 0
#define TG3_DEF_MAC_MODE 0
#define TG3_DEF_RX_MODE 0
#define TG3_DEF_RX_MODE 0
...
@@ -5117,7 +5117,7 @@ static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr,
...
@@ -5117,7 +5117,7 @@ static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr,
}
}
static
void
__tg3_set_rx_mode
(
struct
net_device
*
);
static
void
__tg3_set_rx_mode
(
struct
net_device
*
);
static
void
tg3_set_coalesce
(
struct
tg3
*
tp
,
struct
ethtool_coalesce
*
ec
)
static
void
__
tg3_set_coalesce
(
struct
tg3
*
tp
,
struct
ethtool_coalesce
*
ec
)
{
{
tw32
(
HOSTCC_RXCOL_TICKS
,
ec
->
rx_coalesce_usecs
);
tw32
(
HOSTCC_RXCOL_TICKS
,
ec
->
rx_coalesce_usecs
);
tw32
(
HOSTCC_TXCOL_TICKS
,
ec
->
tx_coalesce_usecs
);
tw32
(
HOSTCC_TXCOL_TICKS
,
ec
->
tx_coalesce_usecs
);
...
@@ -5460,7 +5460,7 @@ static int tg3_reset_hw(struct tg3 *tp)
...
@@ -5460,7 +5460,7 @@ static int tg3_reset_hw(struct tg3 *tp)
udelay
(
10
);
udelay
(
10
);
}
}
tg3_set_coalesce
(
tp
,
&
tp
->
coal
);
__
tg3_set_coalesce
(
tp
,
&
tp
->
coal
);
/* set status block DMA address */
/* set status block DMA address */
tw32
(
HOSTCC_STATUS_BLK_HOST_ADDR
+
TG3_64BIT_REG_HIGH
,
tw32
(
HOSTCC_STATUS_BLK_HOST_ADDR
+
TG3_64BIT_REG_HIGH
,
...
@@ -7821,6 +7821,60 @@ static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
...
@@ -7821,6 +7821,60 @@ static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
return
0
;
return
0
;
}
}
static
int
tg3_set_coalesce
(
struct
net_device
*
dev
,
struct
ethtool_coalesce
*
ec
)
{
struct
tg3
*
tp
=
netdev_priv
(
dev
);
u32
max_rxcoal_tick_int
=
0
,
max_txcoal_tick_int
=
0
;
u32
max_stat_coal_ticks
=
0
,
min_stat_coal_ticks
=
0
;
if
(
!
(
tp
->
tg3_flags2
&
TG3_FLG2_5705_PLUS
))
{
max_rxcoal_tick_int
=
MAX_RXCOAL_TICK_INT
;
max_txcoal_tick_int
=
MAX_TXCOAL_TICK_INT
;
max_stat_coal_ticks
=
MAX_STAT_COAL_TICKS
;
min_stat_coal_ticks
=
MIN_STAT_COAL_TICKS
;
}
if
((
ec
->
rx_coalesce_usecs
>
MAX_RXCOL_TICKS
)
||
(
ec
->
tx_coalesce_usecs
>
MAX_TXCOL_TICKS
)
||
(
ec
->
rx_max_coalesced_frames
>
MAX_RXMAX_FRAMES
)
||
(
ec
->
tx_max_coalesced_frames
>
MAX_TXMAX_FRAMES
)
||
(
ec
->
rx_coalesce_usecs_irq
>
max_rxcoal_tick_int
)
||
(
ec
->
tx_coalesce_usecs_irq
>
max_txcoal_tick_int
)
||
(
ec
->
rx_max_coalesced_frames_irq
>
MAX_RXCOAL_MAXF_INT
)
||
(
ec
->
tx_max_coalesced_frames_irq
>
MAX_TXCOAL_MAXF_INT
)
||
(
ec
->
stats_block_coalesce_usecs
>
max_stat_coal_ticks
)
||
(
ec
->
stats_block_coalesce_usecs
<
min_stat_coal_ticks
))
return
-
EINVAL
;
/* No rx interrupts will be generated if both are zero */
if
((
ec
->
rx_coalesce_usecs
==
0
)
&&
(
ec
->
rx_max_coalesced_frames
==
0
))
return
-
EINVAL
;
/* No tx interrupts will be generated if both are zero */
if
((
ec
->
tx_coalesce_usecs
==
0
)
&&
(
ec
->
tx_max_coalesced_frames
==
0
))
return
-
EINVAL
;
/* Only copy relevant parameters, ignore all others. */
tp
->
coal
.
rx_coalesce_usecs
=
ec
->
rx_coalesce_usecs
;
tp
->
coal
.
tx_coalesce_usecs
=
ec
->
tx_coalesce_usecs
;
tp
->
coal
.
rx_max_coalesced_frames
=
ec
->
rx_max_coalesced_frames
;
tp
->
coal
.
tx_max_coalesced_frames
=
ec
->
tx_max_coalesced_frames
;
tp
->
coal
.
rx_coalesce_usecs_irq
=
ec
->
rx_coalesce_usecs_irq
;
tp
->
coal
.
tx_coalesce_usecs_irq
=
ec
->
tx_coalesce_usecs_irq
;
tp
->
coal
.
rx_max_coalesced_frames_irq
=
ec
->
rx_max_coalesced_frames_irq
;
tp
->
coal
.
tx_max_coalesced_frames_irq
=
ec
->
tx_max_coalesced_frames_irq
;
tp
->
coal
.
stats_block_coalesce_usecs
=
ec
->
stats_block_coalesce_usecs
;
if
(
netif_running
(
dev
))
{
tg3_full_lock
(
tp
,
0
);
__tg3_set_coalesce
(
tp
,
&
tp
->
coal
);
tg3_full_unlock
(
tp
);
}
return
0
;
}
static
struct
ethtool_ops
tg3_ethtool_ops
=
{
static
struct
ethtool_ops
tg3_ethtool_ops
=
{
.
get_settings
=
tg3_get_settings
,
.
get_settings
=
tg3_get_settings
,
.
set_settings
=
tg3_set_settings
,
.
set_settings
=
tg3_set_settings
,
...
@@ -7856,6 +7910,7 @@ static struct ethtool_ops tg3_ethtool_ops = {
...
@@ -7856,6 +7910,7 @@ static struct ethtool_ops tg3_ethtool_ops = {
.
get_stats_count
=
tg3_get_stats_count
,
.
get_stats_count
=
tg3_get_stats_count
,
.
get_ethtool_stats
=
tg3_get_ethtool_stats
,
.
get_ethtool_stats
=
tg3_get_ethtool_stats
,
.
get_coalesce
=
tg3_get_coalesce
,
.
get_coalesce
=
tg3_get_coalesce
,
.
set_coalesce
=
tg3_set_coalesce
,
};
};
static
void
__devinit
tg3_get_eeprom_size
(
struct
tg3
*
tp
)
static
void
__devinit
tg3_get_eeprom_size
(
struct
tg3
*
tp
)
...
@@ -9800,6 +9855,12 @@ static void __devinit tg3_init_coal(struct tg3 *tp)
...
@@ -9800,6 +9855,12 @@ static void __devinit tg3_init_coal(struct tg3 *tp)
ec
->
tx_coalesce_usecs
=
LOW_TXCOL_TICKS_CLRTCKS
;
ec
->
tx_coalesce_usecs
=
LOW_TXCOL_TICKS_CLRTCKS
;
ec
->
tx_coalesce_usecs_irq
=
DEFAULT_TXCOAL_TICK_INT_CLRTCKS
;
ec
->
tx_coalesce_usecs_irq
=
DEFAULT_TXCOAL_TICK_INT_CLRTCKS
;
}
}
if
(
tp
->
tg3_flags2
&
TG3_FLG2_5705_PLUS
)
{
ec
->
rx_coalesce_usecs_irq
=
0
;
ec
->
tx_coalesce_usecs_irq
=
0
;
ec
->
stats_block_coalesce_usecs
=
0
;
}
}
}
static
int
__devinit
tg3_init_one
(
struct
pci_dev
*
pdev
,
static
int
__devinit
tg3_init_one
(
struct
pci_dev
*
pdev
,
...
...
drivers/net/tg3.h
浏览文件 @
5432ebb5
...
@@ -879,31 +879,41 @@
...
@@ -879,31 +879,41 @@
#define LOW_RXCOL_TICKS_CLRTCKS 0x00000014
#define LOW_RXCOL_TICKS_CLRTCKS 0x00000014
#define DEFAULT_RXCOL_TICKS 0x00000048
#define DEFAULT_RXCOL_TICKS 0x00000048
#define HIGH_RXCOL_TICKS 0x00000096
#define HIGH_RXCOL_TICKS 0x00000096
#define MAX_RXCOL_TICKS 0x000003ff
#define HOSTCC_TXCOL_TICKS 0x00003c0c
#define HOSTCC_TXCOL_TICKS 0x00003c0c
#define LOW_TXCOL_TICKS 0x00000096
#define LOW_TXCOL_TICKS 0x00000096
#define LOW_TXCOL_TICKS_CLRTCKS 0x00000048
#define LOW_TXCOL_TICKS_CLRTCKS 0x00000048
#define DEFAULT_TXCOL_TICKS 0x0000012c
#define DEFAULT_TXCOL_TICKS 0x0000012c
#define HIGH_TXCOL_TICKS 0x00000145
#define HIGH_TXCOL_TICKS 0x00000145
#define MAX_TXCOL_TICKS 0x000003ff
#define HOSTCC_RXMAX_FRAMES 0x00003c10
#define HOSTCC_RXMAX_FRAMES 0x00003c10
#define LOW_RXMAX_FRAMES 0x00000005
#define LOW_RXMAX_FRAMES 0x00000005
#define DEFAULT_RXMAX_FRAMES 0x00000008
#define DEFAULT_RXMAX_FRAMES 0x00000008
#define HIGH_RXMAX_FRAMES 0x00000012
#define HIGH_RXMAX_FRAMES 0x00000012
#define MAX_RXMAX_FRAMES 0x000000ff
#define HOSTCC_TXMAX_FRAMES 0x00003c14
#define HOSTCC_TXMAX_FRAMES 0x00003c14
#define LOW_TXMAX_FRAMES 0x00000035
#define LOW_TXMAX_FRAMES 0x00000035
#define DEFAULT_TXMAX_FRAMES 0x0000004b
#define DEFAULT_TXMAX_FRAMES 0x0000004b
#define HIGH_TXMAX_FRAMES 0x00000052
#define HIGH_TXMAX_FRAMES 0x00000052
#define MAX_TXMAX_FRAMES 0x000000ff
#define HOSTCC_RXCOAL_TICK_INT 0x00003c18
#define HOSTCC_RXCOAL_TICK_INT 0x00003c18
#define DEFAULT_RXCOAL_TICK_INT 0x00000019
#define DEFAULT_RXCOAL_TICK_INT 0x00000019
#define DEFAULT_RXCOAL_TICK_INT_CLRTCKS 0x00000014
#define DEFAULT_RXCOAL_TICK_INT_CLRTCKS 0x00000014
#define MAX_RXCOAL_TICK_INT 0x000003ff
#define HOSTCC_TXCOAL_TICK_INT 0x00003c1c
#define HOSTCC_TXCOAL_TICK_INT 0x00003c1c
#define DEFAULT_TXCOAL_TICK_INT 0x00000019
#define DEFAULT_TXCOAL_TICK_INT 0x00000019
#define DEFAULT_TXCOAL_TICK_INT_CLRTCKS 0x00000014
#define DEFAULT_TXCOAL_TICK_INT_CLRTCKS 0x00000014
#define MAX_TXCOAL_TICK_INT 0x000003ff
#define HOSTCC_RXCOAL_MAXF_INT 0x00003c20
#define HOSTCC_RXCOAL_MAXF_INT 0x00003c20
#define DEFAULT_RXCOAL_MAXF_INT 0x00000005
#define DEFAULT_RXCOAL_MAXF_INT 0x00000005
#define MAX_RXCOAL_MAXF_INT 0x000000ff
#define HOSTCC_TXCOAL_MAXF_INT 0x00003c24
#define HOSTCC_TXCOAL_MAXF_INT 0x00003c24
#define DEFAULT_TXCOAL_MAXF_INT 0x00000005
#define DEFAULT_TXCOAL_MAXF_INT 0x00000005
#define MAX_TXCOAL_MAXF_INT 0x000000ff
#define HOSTCC_STAT_COAL_TICKS 0x00003c28
#define HOSTCC_STAT_COAL_TICKS 0x00003c28
#define DEFAULT_STAT_COAL_TICKS 0x000f4240
#define DEFAULT_STAT_COAL_TICKS 0x000f4240
#define MAX_STAT_COAL_TICKS 0xd693d400
#define MIN_STAT_COAL_TICKS 0x00000064
/* 0x3c2c --> 0x3c30 unused */
/* 0x3c2c --> 0x3c30 unused */
#define HOSTCC_STATS_BLK_HOST_ADDR 0x00003c30
/* 64-bit */
#define HOSTCC_STATS_BLK_HOST_ADDR 0x00003c30
/* 64-bit */
#define HOSTCC_STATUS_BLK_HOST_ADDR 0x00003c38
/* 64-bit */
#define HOSTCC_STATUS_BLK_HOST_ADDR 0x00003c38
/* 64-bit */
...
...
include/linux/if_shaper.h
浏览文件 @
5432ebb5
...
@@ -23,7 +23,7 @@ struct shaper
...
@@ -23,7 +23,7 @@ struct shaper
__u32
shapeclock
;
__u32
shapeclock
;
unsigned
long
recovery
;
/* Time we can next clock a packet out on
unsigned
long
recovery
;
/* Time we can next clock a packet out on
an empty queue */
an empty queue */
s
truct
semaphore
sem
;
s
pinlock_t
lock
;
struct
net_device_stats
stats
;
struct
net_device_stats
stats
;
struct
net_device
*
dev
;
struct
net_device
*
dev
;
int
(
*
hard_start_xmit
)
(
struct
sk_buff
*
skb
,
int
(
*
hard_start_xmit
)
(
struct
sk_buff
*
skb
,
...
...
include/linux/skbuff.h
浏览文件 @
5432ebb5
...
@@ -183,7 +183,6 @@ struct skb_shared_info {
...
@@ -183,7 +183,6 @@ struct skb_shared_info {
* @priority: Packet queueing priority
* @priority: Packet queueing priority
* @users: User count - see {datagram,tcp}.c
* @users: User count - see {datagram,tcp}.c
* @protocol: Packet protocol from driver
* @protocol: Packet protocol from driver
* @security: Security level of packet
* @truesize: Buffer size
* @truesize: Buffer size
* @head: Head of buffer
* @head: Head of buffer
* @data: Data head pointer
* @data: Data head pointer
...
@@ -249,18 +248,18 @@ struct sk_buff {
...
@@ -249,18 +248,18 @@ struct sk_buff {
data_len
,
data_len
,
mac_len
,
mac_len
,
csum
;
csum
;
unsigned
char
local_df
,
cloned:
1
,
nohdr:
1
,
pkt_type
,
ip_summed
;
__u32
priority
;
__u32
priority
;
unsigned
short
protocol
,
__u8
local_df
:
1
,
security
;
cloned:
1
,
ip_summed:
2
,
nohdr:
1
;
/* 3 bits spare */
__u8
pkt_type
;
__u16
protocol
;
void
(
*
destructor
)(
struct
sk_buff
*
skb
);
void
(
*
destructor
)(
struct
sk_buff
*
skb
);
#ifdef CONFIG_NETFILTER
#ifdef CONFIG_NETFILTER
unsigned
long
nfmark
;
unsigned
long
nfmark
;
__u32
nfcache
;
__u32
nfcache
;
__u32
nfctinfo
;
__u32
nfctinfo
;
struct
nf_conntrack
*
nfct
;
struct
nf_conntrack
*
nfct
;
...
@@ -1211,7 +1210,7 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
...
@@ -1211,7 +1210,7 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
{
{
int
hlen
=
skb_headlen
(
skb
);
int
hlen
=
skb_headlen
(
skb
);
if
(
offset
+
len
<=
h
len
)
if
(
hlen
-
offset
>=
len
)
return
skb
->
data
+
offset
;
return
skb
->
data
+
offset
;
if
(
skb_copy_bits
(
skb
,
offset
,
buffer
,
len
)
<
0
)
if
(
skb_copy_bits
(
skb
,
offset
,
buffer
,
len
)
<
0
)
...
...
include/linux/tc_ematch/tc_em_meta.h
浏览文件 @
5432ebb5
...
@@ -45,7 +45,7 @@ enum
...
@@ -45,7 +45,7 @@ enum
TCF_META_ID_REALDEV
,
TCF_META_ID_REALDEV
,
TCF_META_ID_PRIORITY
,
TCF_META_ID_PRIORITY
,
TCF_META_ID_PROTOCOL
,
TCF_META_ID_PROTOCOL
,
TCF_META_ID_SECURITY
,
TCF_META_ID_SECURITY
,
/* obsolete */
TCF_META_ID_PKTTYPE
,
TCF_META_ID_PKTTYPE
,
TCF_META_ID_PKTLEN
,
TCF_META_ID_PKTLEN
,
TCF_META_ID_DATALEN
,
TCF_META_ID_DATALEN
,
...
...
include/linux/tcp.h
浏览文件 @
5432ebb5
...
@@ -286,7 +286,7 @@ struct tcp_sock {
...
@@ -286,7 +286,7 @@ struct tcp_sock {
__u32
max_window
;
/* Maximal window ever seen from peer */
__u32
max_window
;
/* Maximal window ever seen from peer */
__u32
pmtu_cookie
;
/* Last pmtu seen by socket */
__u32
pmtu_cookie
;
/* Last pmtu seen by socket */
__u32
mss_cache
;
/* Cached effective mss, not including SACKS */
__u32
mss_cache
;
/* Cached effective mss, not including SACKS */
__u16
mss_cache_std
;
/* Like mss_cache, but without TSO
*/
__u16
xmit_size_goal
;
/* Goal for segmenting output packets
*/
__u16
ext_header_len
;
/* Network protocol overhead (IP/IPv6 options) */
__u16
ext_header_len
;
/* Network protocol overhead (IP/IPv6 options) */
__u8
ca_state
;
/* State of fast-retransmit machine */
__u8
ca_state
;
/* State of fast-retransmit machine */
__u8
retransmits
;
/* Number of unrecovered RTO timeouts. */
__u8
retransmits
;
/* Number of unrecovered RTO timeouts. */
...
...
include/net/pkt_sched.h
浏览文件 @
5432ebb5
...
@@ -13,13 +13,12 @@ struct qdisc_walker
...
@@ -13,13 +13,12 @@ struct qdisc_walker
extern
rwlock_t
qdisc_tree_lock
;
extern
rwlock_t
qdisc_tree_lock
;
#define
QDISC_ALIGN
32
#define
QDISC_ALIGNTO
32
#define
QDISC_ALIGN_CONST (QDISC_ALIGN - 1
)
#define
QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1)
)
static
inline
void
*
qdisc_priv
(
struct
Qdisc
*
q
)
static
inline
void
*
qdisc_priv
(
struct
Qdisc
*
q
)
{
{
return
(
char
*
)
q
+
((
sizeof
(
struct
Qdisc
)
+
QDISC_ALIGN_CONST
)
return
(
char
*
)
q
+
QDISC_ALIGN
(
sizeof
(
struct
Qdisc
));
&
~
QDISC_ALIGN_CONST
);
}
}
/*
/*
...
@@ -207,8 +206,6 @@ psched_tod_diff(int delta_sec, int bound)
...
@@ -207,8 +206,6 @@ psched_tod_diff(int delta_sec, int bound)
#endif
/* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
#endif
/* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
extern
struct
Qdisc
noop_qdisc
;
extern
struct
Qdisc_ops
noop_qdisc_ops
;
extern
struct
Qdisc_ops
pfifo_qdisc_ops
;
extern
struct
Qdisc_ops
pfifo_qdisc_ops
;
extern
struct
Qdisc_ops
bfifo_qdisc_ops
;
extern
struct
Qdisc_ops
bfifo_qdisc_ops
;
...
@@ -216,14 +213,6 @@ extern int register_qdisc(struct Qdisc_ops *qops);
...
@@ -216,14 +213,6 @@ extern int register_qdisc(struct Qdisc_ops *qops);
extern
int
unregister_qdisc
(
struct
Qdisc_ops
*
qops
);
extern
int
unregister_qdisc
(
struct
Qdisc_ops
*
qops
);
extern
struct
Qdisc
*
qdisc_lookup
(
struct
net_device
*
dev
,
u32
handle
);
extern
struct
Qdisc
*
qdisc_lookup
(
struct
net_device
*
dev
,
u32
handle
);
extern
struct
Qdisc
*
qdisc_lookup_class
(
struct
net_device
*
dev
,
u32
handle
);
extern
struct
Qdisc
*
qdisc_lookup_class
(
struct
net_device
*
dev
,
u32
handle
);
extern
void
dev_init_scheduler
(
struct
net_device
*
dev
);
extern
void
dev_shutdown
(
struct
net_device
*
dev
);
extern
void
dev_activate
(
struct
net_device
*
dev
);
extern
void
dev_deactivate
(
struct
net_device
*
dev
);
extern
void
qdisc_reset
(
struct
Qdisc
*
qdisc
);
extern
void
qdisc_destroy
(
struct
Qdisc
*
qdisc
);
extern
struct
Qdisc
*
qdisc_create_dflt
(
struct
net_device
*
dev
,
struct
Qdisc_ops
*
ops
);
extern
struct
qdisc_rate_table
*
qdisc_get_rtab
(
struct
tc_ratespec
*
r
,
extern
struct
qdisc_rate_table
*
qdisc_get_rtab
(
struct
tc_ratespec
*
r
,
struct
rtattr
*
tab
);
struct
rtattr
*
tab
);
extern
void
qdisc_put_rtab
(
struct
qdisc_rate_table
*
tab
);
extern
void
qdisc_put_rtab
(
struct
qdisc_rate_table
*
tab
);
...
...
include/net/sch_generic.h
浏览文件 @
5432ebb5
...
@@ -164,6 +164,19 @@ extern void qdisc_unlock_tree(struct net_device *dev);
...
@@ -164,6 +164,19 @@ extern void qdisc_unlock_tree(struct net_device *dev);
#define tcf_tree_lock(tp) qdisc_lock_tree((tp)->q->dev)
#define tcf_tree_lock(tp) qdisc_lock_tree((tp)->q->dev)
#define tcf_tree_unlock(tp) qdisc_unlock_tree((tp)->q->dev)
#define tcf_tree_unlock(tp) qdisc_unlock_tree((tp)->q->dev)
extern
struct
Qdisc
noop_qdisc
;
extern
struct
Qdisc_ops
noop_qdisc_ops
;
extern
void
dev_init_scheduler
(
struct
net_device
*
dev
);
extern
void
dev_shutdown
(
struct
net_device
*
dev
);
extern
void
dev_activate
(
struct
net_device
*
dev
);
extern
void
dev_deactivate
(
struct
net_device
*
dev
);
extern
void
qdisc_reset
(
struct
Qdisc
*
qdisc
);
extern
void
qdisc_destroy
(
struct
Qdisc
*
qdisc
);
extern
struct
Qdisc
*
qdisc_alloc
(
struct
net_device
*
dev
,
struct
Qdisc_ops
*
ops
);
extern
struct
Qdisc
*
qdisc_create_dflt
(
struct
net_device
*
dev
,
struct
Qdisc_ops
*
ops
);
static
inline
void
static
inline
void
tcf_destroy
(
struct
tcf_proto
*
tp
)
tcf_destroy
(
struct
tcf_proto
*
tp
)
{
{
...
...
include/net/slhc_vj.h
浏览文件 @
5432ebb5
...
@@ -170,19 +170,14 @@ struct slcompress {
...
@@ -170,19 +170,14 @@ struct slcompress {
};
};
#define NULLSLCOMPR (struct slcompress *)0
#define NULLSLCOMPR (struct slcompress *)0
#define __ARGS(x) x
/* In slhc.c: */
/* In slhc.c: */
struct
slcompress
*
slhc_init
__ARGS
((
int
rslots
,
int
tslots
));
struct
slcompress
*
slhc_init
(
int
rslots
,
int
tslots
);
void
slhc_free
__ARGS
((
struct
slcompress
*
comp
));
void
slhc_free
(
struct
slcompress
*
comp
);
int
slhc_compress
__ARGS
((
struct
slcompress
*
comp
,
unsigned
char
*
icp
,
int
slhc_compress
(
struct
slcompress
*
comp
,
unsigned
char
*
icp
,
int
isize
,
int
isize
,
unsigned
char
*
ocp
,
unsigned
char
**
cpp
,
unsigned
char
*
ocp
,
unsigned
char
**
cpp
,
int
compress_cid
);
int
compress_cid
));
int
slhc_uncompress
(
struct
slcompress
*
comp
,
unsigned
char
*
icp
,
int
isize
);
int
slhc_uncompress
__ARGS
((
struct
slcompress
*
comp
,
unsigned
char
*
icp
,
int
slhc_remember
(
struct
slcompress
*
comp
,
unsigned
char
*
icp
,
int
isize
);
int
isize
));
int
slhc_toss
(
struct
slcompress
*
comp
);
int
slhc_remember
__ARGS
((
struct
slcompress
*
comp
,
unsigned
char
*
icp
,
int
isize
));
int
slhc_toss
__ARGS
((
struct
slcompress
*
comp
));
#endif
/* _SLHC_H */
#endif
/* _SLHC_H */
include/net/sock.h
浏览文件 @
5432ebb5
...
@@ -1134,13 +1134,16 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
...
@@ -1134,13 +1134,16 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
static
inline
struct
sk_buff
*
sk_stream_alloc_pskb
(
struct
sock
*
sk
,
static
inline
struct
sk_buff
*
sk_stream_alloc_pskb
(
struct
sock
*
sk
,
int
size
,
int
mem
,
int
gfp
)
int
size
,
int
mem
,
int
gfp
)
{
{
struct
sk_buff
*
skb
=
alloc_skb
(
size
+
sk
->
sk_prot
->
max_header
,
gfp
);
struct
sk_buff
*
skb
;
int
hdr_len
;
hdr_len
=
SKB_DATA_ALIGN
(
sk
->
sk_prot
->
max_header
);
skb
=
alloc_skb
(
size
+
hdr_len
,
gfp
);
if
(
skb
)
{
if
(
skb
)
{
skb
->
truesize
+=
mem
;
skb
->
truesize
+=
mem
;
if
(
sk
->
sk_forward_alloc
>=
(
int
)
skb
->
truesize
||
if
(
sk
->
sk_forward_alloc
>=
(
int
)
skb
->
truesize
||
sk_stream_mem_schedule
(
sk
,
skb
->
truesize
,
0
))
{
sk_stream_mem_schedule
(
sk
,
skb
->
truesize
,
0
))
{
skb_reserve
(
skb
,
sk
->
sk_prot
->
max_header
);
skb_reserve
(
skb
,
hdr_len
);
return
skb
;
return
skb
;
}
}
__kfree_skb
(
skb
);
__kfree_skb
(
skb
);
...
...
include/net/tcp.h
浏览文件 @
5432ebb5
...
@@ -721,11 +721,16 @@ static inline int tcp_ack_scheduled(struct tcp_sock *tp)
...
@@ -721,11 +721,16 @@ static inline int tcp_ack_scheduled(struct tcp_sock *tp)
return
tp
->
ack
.
pending
&
TCP_ACK_SCHED
;
return
tp
->
ack
.
pending
&
TCP_ACK_SCHED
;
}
}
static
__inline__
void
tcp_dec_quickack_mode
(
struct
tcp_sock
*
tp
)
static
__inline__
void
tcp_dec_quickack_mode
(
struct
tcp_sock
*
tp
,
unsigned
int
pkts
)
{
{
if
(
tp
->
ack
.
quick
&&
--
tp
->
ack
.
quick
==
0
)
{
if
(
tp
->
ack
.
quick
)
{
/* Leaving quickack mode we deflate ATO. */
if
(
pkts
>=
tp
->
ack
.
quick
)
{
tp
->
ack
.
ato
=
TCP_ATO_MIN
;
tp
->
ack
.
quick
=
0
;
/* Leaving quickack mode we deflate ATO. */
tp
->
ack
.
ato
=
TCP_ATO_MIN
;
}
else
tp
->
ack
.
quick
-=
pkts
;
}
}
}
}
...
@@ -843,7 +848,9 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
...
@@ -843,7 +848,9 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
/* tcp_output.c */
/* tcp_output.c */
extern
int
tcp_write_xmit
(
struct
sock
*
,
int
nonagle
);
extern
void
__tcp_push_pending_frames
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
,
unsigned
int
cur_mss
,
int
nonagle
);
extern
int
tcp_may_send_now
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
);
extern
int
tcp_retransmit_skb
(
struct
sock
*
,
struct
sk_buff
*
);
extern
int
tcp_retransmit_skb
(
struct
sock
*
,
struct
sk_buff
*
);
extern
void
tcp_xmit_retransmit_queue
(
struct
sock
*
);
extern
void
tcp_xmit_retransmit_queue
(
struct
sock
*
);
extern
void
tcp_simple_retransmit
(
struct
sock
*
);
extern
void
tcp_simple_retransmit
(
struct
sock
*
);
...
@@ -855,10 +862,13 @@ extern int tcp_write_wakeup(struct sock *);
...
@@ -855,10 +862,13 @@ extern int tcp_write_wakeup(struct sock *);
extern
void
tcp_send_fin
(
struct
sock
*
sk
);
extern
void
tcp_send_fin
(
struct
sock
*
sk
);
extern
void
tcp_send_active_reset
(
struct
sock
*
sk
,
int
priority
);
extern
void
tcp_send_active_reset
(
struct
sock
*
sk
,
int
priority
);
extern
int
tcp_send_synack
(
struct
sock
*
);
extern
int
tcp_send_synack
(
struct
sock
*
);
extern
void
tcp_push_one
(
struct
sock
*
,
unsigned
mss_now
);
extern
void
tcp_push_one
(
struct
sock
*
,
unsigned
int
mss_now
);
extern
void
tcp_send_ack
(
struct
sock
*
sk
);
extern
void
tcp_send_ack
(
struct
sock
*
sk
);
extern
void
tcp_send_delayed_ack
(
struct
sock
*
sk
);
extern
void
tcp_send_delayed_ack
(
struct
sock
*
sk
);
/* tcp_input.c */
extern
void
tcp_cwnd_application_limited
(
struct
sock
*
sk
);
/* tcp_timer.c */
/* tcp_timer.c */
extern
void
tcp_init_xmit_timers
(
struct
sock
*
);
extern
void
tcp_init_xmit_timers
(
struct
sock
*
);
extern
void
tcp_clear_xmit_timers
(
struct
sock
*
);
extern
void
tcp_clear_xmit_timers
(
struct
sock
*
);
...
@@ -958,7 +968,7 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
...
@@ -958,7 +968,7 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
static
inline
void
tcp_initialize_rcv_mss
(
struct
sock
*
sk
)
static
inline
void
tcp_initialize_rcv_mss
(
struct
sock
*
sk
)
{
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
unsigned
int
hint
=
min
(
tp
->
advmss
,
tp
->
mss_cache_std
);
unsigned
int
hint
=
min
_t
(
unsigned
int
,
tp
->
advmss
,
tp
->
mss_cache
);
hint
=
min
(
hint
,
tp
->
rcv_wnd
/
2
);
hint
=
min
(
hint
,
tp
->
rcv_wnd
/
2
);
hint
=
min
(
hint
,
TCP_MIN_RCVMSS
);
hint
=
min
(
hint
,
TCP_MIN_RCVMSS
);
...
@@ -1225,28 +1235,6 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
...
@@ -1225,28 +1235,6 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
tp
->
left_out
=
tp
->
sacked_out
+
tp
->
lost_out
;
tp
->
left_out
=
tp
->
sacked_out
+
tp
->
lost_out
;
}
}
extern
void
tcp_cwnd_application_limited
(
struct
sock
*
sk
);
/* Congestion window validation. (RFC2861) */
static
inline
void
tcp_cwnd_validate
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
{
__u32
packets_out
=
tp
->
packets_out
;
if
(
packets_out
>=
tp
->
snd_cwnd
)
{
/* Network is feed fully. */
tp
->
snd_cwnd_used
=
0
;
tp
->
snd_cwnd_stamp
=
tcp_time_stamp
;
}
else
{
/* Network starves. */
if
(
tp
->
packets_out
>
tp
->
snd_cwnd_used
)
tp
->
snd_cwnd_used
=
tp
->
packets_out
;
if
((
s32
)(
tcp_time_stamp
-
tp
->
snd_cwnd_stamp
)
>=
tp
->
rto
)
tcp_cwnd_application_limited
(
sk
);
}
}
/* Set slow start threshould and cwnd not falling to slow start */
/* Set slow start threshould and cwnd not falling to slow start */
static
inline
void
__tcp_enter_cwr
(
struct
tcp_sock
*
tp
)
static
inline
void
__tcp_enter_cwr
(
struct
tcp_sock
*
tp
)
{
{
...
@@ -1279,12 +1267,6 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
...
@@ -1279,12 +1267,6 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
return
3
;
return
3
;
}
}
static
__inline__
int
tcp_minshall_check
(
const
struct
tcp_sock
*
tp
)
{
return
after
(
tp
->
snd_sml
,
tp
->
snd_una
)
&&
!
after
(
tp
->
snd_sml
,
tp
->
snd_nxt
);
}
static
__inline__
void
tcp_minshall_update
(
struct
tcp_sock
*
tp
,
int
mss
,
static
__inline__
void
tcp_minshall_update
(
struct
tcp_sock
*
tp
,
int
mss
,
const
struct
sk_buff
*
skb
)
const
struct
sk_buff
*
skb
)
{
{
...
@@ -1292,122 +1274,18 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
...
@@ -1292,122 +1274,18 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
tp
->
snd_sml
=
TCP_SKB_CB
(
skb
)
->
end_seq
;
tp
->
snd_sml
=
TCP_SKB_CB
(
skb
)
->
end_seq
;
}
}
/* Return 0, if packet can be sent now without violation Nagle's rules:
1. It is full sized.
2. Or it contains FIN.
3. Or TCP_NODELAY was set.
4. Or TCP_CORK is not set, and all sent packets are ACKed.
With Minshall's modification: all sent small packets are ACKed.
*/
static
__inline__
int
tcp_nagle_check
(
const
struct
tcp_sock
*
tp
,
const
struct
sk_buff
*
skb
,
unsigned
mss_now
,
int
nonagle
)
{
return
(
skb
->
len
<
mss_now
&&
!
(
TCP_SKB_CB
(
skb
)
->
flags
&
TCPCB_FLAG_FIN
)
&&
((
nonagle
&
TCP_NAGLE_CORK
)
||
(
!
nonagle
&&
tp
->
packets_out
&&
tcp_minshall_check
(
tp
))));
}
extern
void
tcp_set_skb_tso_segs
(
struct
sock
*
,
struct
sk_buff
*
);
/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
* should be put on the wire right now.
*/
static
__inline__
int
tcp_snd_test
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
,
unsigned
cur_mss
,
int
nonagle
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
int
pkts
=
tcp_skb_pcount
(
skb
);
if
(
!
pkts
)
{
tcp_set_skb_tso_segs
(
sk
,
skb
);
pkts
=
tcp_skb_pcount
(
skb
);
}
/* RFC 1122 - section 4.2.3.4
*
* We must queue if
*
* a) The right edge of this frame exceeds the window
* b) There are packets in flight and we have a small segment
* [SWS avoidance and Nagle algorithm]
* (part of SWS is done on packetization)
* Minshall version sounds: there are no _small_
* segments in flight. (tcp_nagle_check)
* c) We have too many packets 'in flight'
*
* Don't use the nagle rule for urgent data (or
* for the final FIN -DaveM).
*
* Also, Nagle rule does not apply to frames, which
* sit in the middle of queue (they have no chances
* to get new data) and if room at tail of skb is
* not enough to save something seriously (<32 for now).
*/
/* Don't be strict about the congestion window for the
* final FIN frame. -DaveM
*/
return
(((
nonagle
&
TCP_NAGLE_PUSH
)
||
tp
->
urg_mode
||
!
tcp_nagle_check
(
tp
,
skb
,
cur_mss
,
nonagle
))
&&
(((
tcp_packets_in_flight
(
tp
)
+
(
pkts
-
1
))
<
tp
->
snd_cwnd
)
||
(
TCP_SKB_CB
(
skb
)
->
flags
&
TCPCB_FLAG_FIN
))
&&
!
after
(
TCP_SKB_CB
(
skb
)
->
end_seq
,
tp
->
snd_una
+
tp
->
snd_wnd
));
}
static
__inline__
void
tcp_check_probe_timer
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
static
__inline__
void
tcp_check_probe_timer
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
{
{
if
(
!
tp
->
packets_out
&&
!
tp
->
pending
)
if
(
!
tp
->
packets_out
&&
!
tp
->
pending
)
tcp_reset_xmit_timer
(
sk
,
TCP_TIME_PROBE0
,
tp
->
rto
);
tcp_reset_xmit_timer
(
sk
,
TCP_TIME_PROBE0
,
tp
->
rto
);
}
}
static
__inline__
int
tcp_skb_is_last
(
const
struct
sock
*
sk
,
const
struct
sk_buff
*
skb
)
{
return
skb
->
next
==
(
struct
sk_buff
*
)
&
sk
->
sk_write_queue
;
}
/* Push out any pending frames which were held back due to
* TCP_CORK or attempt at coalescing tiny packets.
* The socket must be locked by the caller.
*/
static
__inline__
void
__tcp_push_pending_frames
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
,
unsigned
cur_mss
,
int
nonagle
)
{
struct
sk_buff
*
skb
=
sk
->
sk_send_head
;
if
(
skb
)
{
if
(
!
tcp_skb_is_last
(
sk
,
skb
))
nonagle
=
TCP_NAGLE_PUSH
;
if
(
!
tcp_snd_test
(
sk
,
skb
,
cur_mss
,
nonagle
)
||
tcp_write_xmit
(
sk
,
nonagle
))
tcp_check_probe_timer
(
sk
,
tp
);
}
tcp_cwnd_validate
(
sk
,
tp
);
}
static
__inline__
void
tcp_push_pending_frames
(
struct
sock
*
sk
,
static
__inline__
void
tcp_push_pending_frames
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
struct
tcp_sock
*
tp
)
{
{
__tcp_push_pending_frames
(
sk
,
tp
,
tcp_current_mss
(
sk
,
1
),
tp
->
nonagle
);
__tcp_push_pending_frames
(
sk
,
tp
,
tcp_current_mss
(
sk
,
1
),
tp
->
nonagle
);
}
}
static
__inline__
int
tcp_may_send_now
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
{
struct
sk_buff
*
skb
=
sk
->
sk_send_head
;
return
(
skb
&&
tcp_snd_test
(
sk
,
skb
,
tcp_current_mss
(
sk
,
1
),
tcp_skb_is_last
(
sk
,
skb
)
?
TCP_NAGLE_PUSH
:
tp
->
nonagle
));
}
static
__inline__
void
tcp_init_wl
(
struct
tcp_sock
*
tp
,
u32
ack
,
u32
seq
)
static
__inline__
void
tcp_init_wl
(
struct
tcp_sock
*
tp
,
u32
ack
,
u32
seq
)
{
{
tp
->
snd_wl1
=
seq
;
tp
->
snd_wl1
=
seq
;
...
...
net/core/dev.c
浏览文件 @
5432ebb5
...
@@ -2089,10 +2089,11 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
...
@@ -2089,10 +2089,11 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
{
{
unsigned
short
old_flags
=
dev
->
flags
;
unsigned
short
old_flags
=
dev
->
flags
;
dev
->
flags
|=
IFF_PROMISC
;
if
((
dev
->
promiscuity
+=
inc
)
==
0
)
if
((
dev
->
promiscuity
+=
inc
)
==
0
)
dev
->
flags
&=
~
IFF_PROMISC
;
dev
->
flags
&=
~
IFF_PROMISC
;
if
(
dev
->
flags
^
old_flags
)
{
else
dev
->
flags
|=
IFF_PROMISC
;
if
(
dev
->
flags
!=
old_flags
)
{
dev_mc_upload
(
dev
);
dev_mc_upload
(
dev
);
printk
(
KERN_INFO
"device %s %s promiscuous mode
\n
"
,
printk
(
KERN_INFO
"device %s %s promiscuous mode
\n
"
,
dev
->
name
,
(
dev
->
flags
&
IFF_PROMISC
)
?
"entered"
:
dev
->
name
,
(
dev
->
flags
&
IFF_PROMISC
)
?
"entered"
:
...
...
net/core/filter.c
浏览文件 @
5432ebb5
...
@@ -36,7 +36,7 @@
...
@@ -36,7 +36,7 @@
#include <linux/filter.h>
#include <linux/filter.h>
/* No hurry in this branch */
/* No hurry in this branch */
static
u8
*
load_pointer
(
struct
sk_buff
*
skb
,
int
k
)
static
void
*
__
load_pointer
(
struct
sk_buff
*
skb
,
int
k
)
{
{
u8
*
ptr
=
NULL
;
u8
*
ptr
=
NULL
;
...
@@ -50,6 +50,18 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
...
@@ -50,6 +50,18 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
return
NULL
;
return
NULL
;
}
}
static
inline
void
*
load_pointer
(
struct
sk_buff
*
skb
,
int
k
,
unsigned
int
size
,
void
*
buffer
)
{
if
(
k
>=
0
)
return
skb_header_pointer
(
skb
,
k
,
size
,
buffer
);
else
{
if
(
k
>=
SKF_AD_OFF
)
return
NULL
;
return
__load_pointer
(
skb
,
k
);
}
}
/**
/**
* sk_run_filter - run a filter on a socket
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @skb: buffer to run the filter on
...
@@ -64,15 +76,12 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
...
@@ -64,15 +76,12 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
int
sk_run_filter
(
struct
sk_buff
*
skb
,
struct
sock_filter
*
filter
,
int
flen
)
int
sk_run_filter
(
struct
sk_buff
*
skb
,
struct
sock_filter
*
filter
,
int
flen
)
{
{
unsigned
char
*
data
=
skb
->
data
;
/* len is UNSIGNED. Byte wide insns relies only on implicit
type casts to prevent reading arbitrary memory locations.
*/
unsigned
int
len
=
skb
->
len
-
skb
->
data_len
;
struct
sock_filter
*
fentry
;
/* We walk down these */
struct
sock_filter
*
fentry
;
/* We walk down these */
void
*
ptr
;
u32
A
=
0
;
/* Accumulator */
u32
A
=
0
;
/* Accumulator */
u32
X
=
0
;
/* Index Register */
u32
X
=
0
;
/* Index Register */
u32
mem
[
BPF_MEMWORDS
];
/* Scratch Memory Store */
u32
mem
[
BPF_MEMWORDS
];
/* Scratch Memory Store */
u32
tmp
;
int
k
;
int
k
;
int
pc
;
int
pc
;
...
@@ -168,86 +177,35 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
...
@@ -168,86 +177,35 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
case
BPF_LD
|
BPF_W
|
BPF_ABS
:
case
BPF_LD
|
BPF_W
|
BPF_ABS
:
k
=
fentry
->
k
;
k
=
fentry
->
k
;
load_w:
load_w:
if
(
k
>=
0
&&
(
unsigned
int
)(
k
+
sizeof
(
u32
))
<=
len
)
{
ptr
=
load_pointer
(
skb
,
k
,
4
,
&
tmp
);
A
=
ntohl
(
*
(
u32
*
)
&
data
[
k
]);
if
(
ptr
!=
NULL
)
{
A
=
ntohl
(
*
(
u32
*
)
ptr
);
continue
;
continue
;
}
}
if
(
k
<
0
)
{
u8
*
ptr
;
if
(
k
>=
SKF_AD_OFF
)
break
;
ptr
=
load_pointer
(
skb
,
k
);
if
(
ptr
)
{
A
=
ntohl
(
*
(
u32
*
)
ptr
);
continue
;
}
}
else
{
u32
_tmp
,
*
p
;
p
=
skb_header_pointer
(
skb
,
k
,
4
,
&
_tmp
);
if
(
p
!=
NULL
)
{
A
=
ntohl
(
*
p
);
continue
;
}
}
return
0
;
return
0
;
case
BPF_LD
|
BPF_H
|
BPF_ABS
:
case
BPF_LD
|
BPF_H
|
BPF_ABS
:
k
=
fentry
->
k
;
k
=
fentry
->
k
;
load_h:
load_h:
if
(
k
>=
0
&&
(
unsigned
int
)(
k
+
sizeof
(
u16
))
<=
len
)
{
ptr
=
load_pointer
(
skb
,
k
,
2
,
&
tmp
);
A
=
ntohs
(
*
(
u16
*
)
&
data
[
k
]);
if
(
ptr
!=
NULL
)
{
A
=
ntohs
(
*
(
u16
*
)
ptr
);
continue
;
continue
;
}
}
if
(
k
<
0
)
{
u8
*
ptr
;
if
(
k
>=
SKF_AD_OFF
)
break
;
ptr
=
load_pointer
(
skb
,
k
);
if
(
ptr
)
{
A
=
ntohs
(
*
(
u16
*
)
ptr
);
continue
;
}
}
else
{
u16
_tmp
,
*
p
;
p
=
skb_header_pointer
(
skb
,
k
,
2
,
&
_tmp
);
if
(
p
!=
NULL
)
{
A
=
ntohs
(
*
p
);
continue
;
}
}
return
0
;
return
0
;
case
BPF_LD
|
BPF_B
|
BPF_ABS
:
case
BPF_LD
|
BPF_B
|
BPF_ABS
:
k
=
fentry
->
k
;
k
=
fentry
->
k
;
load_b:
load_b:
if
(
k
>=
0
&&
(
unsigned
int
)
k
<
len
)
{
ptr
=
load_pointer
(
skb
,
k
,
1
,
&
tmp
);
A
=
data
[
k
];
if
(
ptr
!=
NULL
)
{
A
=
*
(
u8
*
)
ptr
;
continue
;
continue
;
}
}
if
(
k
<
0
)
{
u8
*
ptr
;
if
(
k
>=
SKF_AD_OFF
)
break
;
ptr
=
load_pointer
(
skb
,
k
);
if
(
ptr
)
{
A
=
*
ptr
;
continue
;
}
}
else
{
u8
_tmp
,
*
p
;
p
=
skb_header_pointer
(
skb
,
k
,
1
,
&
_tmp
);
if
(
p
!=
NULL
)
{
A
=
*
p
;
continue
;
}
}
return
0
;
return
0
;
case
BPF_LD
|
BPF_W
|
BPF_LEN
:
case
BPF_LD
|
BPF_W
|
BPF_LEN
:
A
=
len
;
A
=
skb
->
len
;
continue
;
continue
;
case
BPF_LDX
|
BPF_W
|
BPF_LEN
:
case
BPF_LDX
|
BPF_W
|
BPF_LEN
:
X
=
len
;
X
=
skb
->
len
;
continue
;
continue
;
case
BPF_LD
|
BPF_W
|
BPF_IND
:
case
BPF_LD
|
BPF_W
|
BPF_IND
:
k
=
X
+
fentry
->
k
;
k
=
X
+
fentry
->
k
;
...
@@ -259,10 +217,12 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
...
@@ -259,10 +217,12 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
k
=
X
+
fentry
->
k
;
k
=
X
+
fentry
->
k
;
goto
load_b
;
goto
load_b
;
case
BPF_LDX
|
BPF_B
|
BPF_MSH
:
case
BPF_LDX
|
BPF_B
|
BPF_MSH
:
if
(
fentry
->
k
>=
len
)
ptr
=
load_pointer
(
skb
,
fentry
->
k
,
1
,
&
tmp
);
return
0
;
if
(
ptr
!=
NULL
)
{
X
=
(
data
[
fentry
->
k
]
&
0xf
)
<<
2
;
X
=
(
*
(
u8
*
)
ptr
&
0xf
)
<<
2
;
continue
;
continue
;
}
return
0
;
case
BPF_LD
|
BPF_IMM
:
case
BPF_LD
|
BPF_IMM
:
A
=
fentry
->
k
;
A
=
fentry
->
k
;
continue
;
continue
;
...
...
net/core/skbuff.c
浏览文件 @
5432ebb5
...
@@ -357,7 +357,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
...
@@ -357,7 +357,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
C
(
ip_summed
);
C
(
ip_summed
);
C
(
priority
);
C
(
priority
);
C
(
protocol
);
C
(
protocol
);
C
(
security
);
n
->
destructor
=
NULL
;
n
->
destructor
=
NULL
;
#ifdef CONFIG_NETFILTER
#ifdef CONFIG_NETFILTER
C
(
nfmark
);
C
(
nfmark
);
...
@@ -422,7 +421,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
...
@@ -422,7 +421,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new
->
pkt_type
=
old
->
pkt_type
;
new
->
pkt_type
=
old
->
pkt_type
;
new
->
stamp
=
old
->
stamp
;
new
->
stamp
=
old
->
stamp
;
new
->
destructor
=
NULL
;
new
->
destructor
=
NULL
;
new
->
security
=
old
->
security
;
#ifdef CONFIG_NETFILTER
#ifdef CONFIG_NETFILTER
new
->
nfmark
=
old
->
nfmark
;
new
->
nfmark
=
old
->
nfmark
;
new
->
nfcache
=
old
->
nfcache
;
new
->
nfcache
=
old
->
nfcache
;
...
...
net/decnet/dn_fib.c
浏览文件 @
5432ebb5
...
@@ -551,7 +551,8 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
...
@@ -551,7 +551,8 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
if
(
t
<
s_t
)
if
(
t
<
s_t
)
continue
;
continue
;
if
(
t
>
s_t
)
if
(
t
>
s_t
)
memset
(
&
cb
->
args
[
1
],
0
,
sizeof
(
cb
->
args
)
-
sizeof
(
int
));
memset
(
&
cb
->
args
[
1
],
0
,
sizeof
(
cb
->
args
)
-
sizeof
(
cb
->
args
[
0
]));
tb
=
dn_fib_get_table
(
t
,
0
);
tb
=
dn_fib_get_table
(
t
,
0
);
if
(
tb
==
NULL
)
if
(
tb
==
NULL
)
continue
;
continue
;
...
...
net/ipv4/af_inet.c
浏览文件 @
5432ebb5
...
@@ -1009,6 +1009,15 @@ static int __init init_ipv4_mibs(void)
...
@@ -1009,6 +1009,15 @@ static int __init init_ipv4_mibs(void)
static
int
ipv4_proc_init
(
void
);
static
int
ipv4_proc_init
(
void
);
extern
void
ipfrag_init
(
void
);
extern
void
ipfrag_init
(
void
);
/*
* IP protocol layer initialiser
*/
static
struct
packet_type
ip_packet_type
=
{
.
type
=
__constant_htons
(
ETH_P_IP
),
.
func
=
ip_rcv
,
};
static
int
__init
inet_init
(
void
)
static
int
__init
inet_init
(
void
)
{
{
struct
sk_buff
*
dummy_skb
;
struct
sk_buff
*
dummy_skb
;
...
@@ -1102,6 +1111,8 @@ static int __init inet_init(void)
...
@@ -1102,6 +1111,8 @@ static int __init inet_init(void)
ipfrag_init
();
ipfrag_init
();
dev_add_pack
(
&
ip_packet_type
);
rc
=
0
;
rc
=
0
;
out:
out:
return
rc
;
return
rc
;
...
...
net/ipv4/fib_trie.c
浏览文件 @
5432ebb5
...
@@ -43,7 +43,7 @@
...
@@ -43,7 +43,7 @@
* 2 of the License, or (at your option) any later version.
* 2 of the License, or (at your option) any later version.
*/
*/
#define VERSION "0.32
4
"
#define VERSION "0.32
5
"
#include <linux/config.h>
#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/uaccess.h>
...
@@ -136,6 +136,7 @@ struct trie_use_stats {
...
@@ -136,6 +136,7 @@ struct trie_use_stats {
unsigned
int
semantic_match_passed
;
unsigned
int
semantic_match_passed
;
unsigned
int
semantic_match_miss
;
unsigned
int
semantic_match_miss
;
unsigned
int
null_node_hit
;
unsigned
int
null_node_hit
;
unsigned
int
resize_node_skipped
;
};
};
#endif
#endif
...
@@ -164,8 +165,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
...
@@ -164,8 +165,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
static
void
tnode_put_child_reorg
(
struct
tnode
*
tn
,
int
i
,
struct
node
*
n
,
int
wasfull
);
static
void
tnode_put_child_reorg
(
struct
tnode
*
tn
,
int
i
,
struct
node
*
n
,
int
wasfull
);
static
int
tnode_child_length
(
struct
tnode
*
tn
);
static
int
tnode_child_length
(
struct
tnode
*
tn
);
static
struct
node
*
resize
(
struct
trie
*
t
,
struct
tnode
*
tn
);
static
struct
node
*
resize
(
struct
trie
*
t
,
struct
tnode
*
tn
);
static
struct
tnode
*
inflate
(
struct
trie
*
t
,
struct
tnode
*
tn
);
static
struct
tnode
*
inflate
(
struct
trie
*
t
,
struct
tnode
*
tn
,
int
*
err
);
static
struct
tnode
*
halve
(
struct
trie
*
t
,
struct
tnode
*
tn
);
static
struct
tnode
*
halve
(
struct
trie
*
t
,
struct
tnode
*
tn
,
int
*
err
);
static
void
tnode_free
(
struct
tnode
*
tn
);
static
void
tnode_free
(
struct
tnode
*
tn
);
static
void
trie_dump_seq
(
struct
seq_file
*
seq
,
struct
trie
*
t
);
static
void
trie_dump_seq
(
struct
seq_file
*
seq
,
struct
trie
*
t
);
extern
struct
fib_alias
*
fib_find_alias
(
struct
list_head
*
fah
,
u8
tos
,
u32
prio
);
extern
struct
fib_alias
*
fib_find_alias
(
struct
list_head
*
fah
,
u8
tos
,
u32
prio
);
...
@@ -358,11 +359,32 @@ static inline void free_leaf_info(struct leaf_info *li)
...
@@ -358,11 +359,32 @@ static inline void free_leaf_info(struct leaf_info *li)
kfree
(
li
);
kfree
(
li
);
}
}
static
struct
tnode
*
tnode_alloc
(
unsigned
int
size
)
{
if
(
size
<=
PAGE_SIZE
)
{
return
kmalloc
(
size
,
GFP_KERNEL
);
}
else
{
return
(
struct
tnode
*
)
__get_free_pages
(
GFP_KERNEL
,
get_order
(
size
));
}
}
static
void
__tnode_free
(
struct
tnode
*
tn
)
{
unsigned
int
size
=
sizeof
(
struct
tnode
)
+
(
1
<<
tn
->
bits
)
*
sizeof
(
struct
node
*
);
if
(
size
<=
PAGE_SIZE
)
kfree
(
tn
);
else
free_pages
((
unsigned
long
)
tn
,
get_order
(
size
));
}
static
struct
tnode
*
tnode_new
(
t_key
key
,
int
pos
,
int
bits
)
static
struct
tnode
*
tnode_new
(
t_key
key
,
int
pos
,
int
bits
)
{
{
int
nchildren
=
1
<<
bits
;
int
nchildren
=
1
<<
bits
;
int
sz
=
sizeof
(
struct
tnode
)
+
nchildren
*
sizeof
(
struct
node
*
);
int
sz
=
sizeof
(
struct
tnode
)
+
nchildren
*
sizeof
(
struct
node
*
);
struct
tnode
*
tn
=
kmalloc
(
sz
,
GFP_KERNEL
);
struct
tnode
*
tn
=
tnode_alloc
(
sz
);
if
(
tn
)
{
if
(
tn
)
{
memset
(
tn
,
0
,
sz
);
memset
(
tn
,
0
,
sz
);
...
@@ -390,7 +412,7 @@ static void tnode_free(struct tnode *tn)
...
@@ -390,7 +412,7 @@ static void tnode_free(struct tnode *tn)
printk
(
"FL %p
\n
"
,
tn
);
printk
(
"FL %p
\n
"
,
tn
);
}
}
else
if
(
IS_TNODE
(
tn
))
{
else
if
(
IS_TNODE
(
tn
))
{
k
free
(
tn
);
__tnode_
free
(
tn
);
if
(
trie_debug
>
0
)
if
(
trie_debug
>
0
)
printk
(
"FT %p
\n
"
,
tn
);
printk
(
"FT %p
\n
"
,
tn
);
}
}
...
@@ -460,6 +482,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w
...
@@ -460,6 +482,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w
static
struct
node
*
resize
(
struct
trie
*
t
,
struct
tnode
*
tn
)
static
struct
node
*
resize
(
struct
trie
*
t
,
struct
tnode
*
tn
)
{
{
int
i
;
int
i
;
int
err
=
0
;
if
(
!
tn
)
if
(
!
tn
)
return
NULL
;
return
NULL
;
...
@@ -556,12 +579,20 @@ static struct node *resize(struct trie *t, struct tnode *tn)
...
@@ -556,12 +579,20 @@ static struct node *resize(struct trie *t, struct tnode *tn)
*/
*/
check_tnode
(
tn
);
check_tnode
(
tn
);
err
=
0
;
while
((
tn
->
full_children
>
0
&&
while
((
tn
->
full_children
>
0
&&
50
*
(
tn
->
full_children
+
tnode_child_length
(
tn
)
-
tn
->
empty_children
)
>=
50
*
(
tn
->
full_children
+
tnode_child_length
(
tn
)
-
tn
->
empty_children
)
>=
inflate_threshold
*
tnode_child_length
(
tn
)))
{
inflate_threshold
*
tnode_child_length
(
tn
)))
{
tn
=
inflate
(
t
,
tn
);
tn
=
inflate
(
t
,
tn
,
&
err
);
if
(
err
)
{
#ifdef CONFIG_IP_FIB_TRIE_STATS
t
->
stats
.
resize_node_skipped
++
;
#endif
break
;
}
}
}
check_tnode
(
tn
);
check_tnode
(
tn
);
...
@@ -570,11 +601,22 @@ static struct node *resize(struct trie *t, struct tnode *tn)
...
@@ -570,11 +601,22 @@ static struct node *resize(struct trie *t, struct tnode *tn)
* Halve as long as the number of empty children in this
* Halve as long as the number of empty children in this
* node is above threshold.
* node is above threshold.
*/
*/
err
=
0
;
while
(
tn
->
bits
>
1
&&
while
(
tn
->
bits
>
1
&&
100
*
(
tnode_child_length
(
tn
)
-
tn
->
empty_children
)
<
100
*
(
tnode_child_length
(
tn
)
-
tn
->
empty_children
)
<
halve_threshold
*
tnode_child_length
(
tn
))
halve_threshold
*
tnode_child_length
(
tn
))
{
tn
=
halve
(
t
,
tn
,
&
err
);
if
(
err
)
{
#ifdef CONFIG_IP_FIB_TRIE_STATS
t
->
stats
.
resize_node_skipped
++
;
#endif
break
;
}
}
tn
=
halve
(
t
,
tn
);
/* Only one child remains */
/* Only one child remains */
...
@@ -599,7 +641,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
...
@@ -599,7 +641,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
return
(
struct
node
*
)
tn
;
return
(
struct
node
*
)
tn
;
}
}
static
struct
tnode
*
inflate
(
struct
trie
*
t
,
struct
tnode
*
tn
)
static
struct
tnode
*
inflate
(
struct
trie
*
t
,
struct
tnode
*
tn
,
int
*
err
)
{
{
struct
tnode
*
inode
;
struct
tnode
*
inode
;
struct
tnode
*
oldtnode
=
tn
;
struct
tnode
*
oldtnode
=
tn
;
...
@@ -611,8 +653,63 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
...
@@ -611,8 +653,63 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
tn
=
tnode_new
(
oldtnode
->
key
,
oldtnode
->
pos
,
oldtnode
->
bits
+
1
);
tn
=
tnode_new
(
oldtnode
->
key
,
oldtnode
->
pos
,
oldtnode
->
bits
+
1
);
if
(
!
tn
)
if
(
!
tn
)
{
trie_bug
(
"tnode_new failed"
);
*
err
=
-
ENOMEM
;
return
oldtnode
;
}
/*
* Preallocate and store tnodes before the actual work so we
* don't get into an inconsistent state if memory allocation
* fails. In case of failure we return the oldnode and inflate
* of tnode is ignored.
*/
for
(
i
=
0
;
i
<
olen
;
i
++
)
{
struct
tnode
*
inode
=
(
struct
tnode
*
)
tnode_get_child
(
oldtnode
,
i
);
if
(
inode
&&
IS_TNODE
(
inode
)
&&
inode
->
pos
==
oldtnode
->
pos
+
oldtnode
->
bits
&&
inode
->
bits
>
1
)
{
struct
tnode
*
left
,
*
right
;
t_key
m
=
TKEY_GET_MASK
(
inode
->
pos
,
1
);
left
=
tnode_new
(
inode
->
key
&
(
~
m
),
inode
->
pos
+
1
,
inode
->
bits
-
1
);
if
(
!
left
)
{
*
err
=
-
ENOMEM
;
break
;
}
right
=
tnode_new
(
inode
->
key
|
m
,
inode
->
pos
+
1
,
inode
->
bits
-
1
);
if
(
!
right
)
{
*
err
=
-
ENOMEM
;
break
;
}
put_child
(
t
,
tn
,
2
*
i
,
(
struct
node
*
)
left
);
put_child
(
t
,
tn
,
2
*
i
+
1
,
(
struct
node
*
)
right
);
}
}
if
(
*
err
)
{
int
size
=
tnode_child_length
(
tn
);
int
j
;
for
(
j
=
0
;
j
<
size
;
j
++
)
if
(
tn
->
child
[
j
])
tnode_free
((
struct
tnode
*
)
tn
->
child
[
j
]);
tnode_free
(
tn
);
*
err
=
-
ENOMEM
;
return
oldtnode
;
}
for
(
i
=
0
;
i
<
olen
;
i
++
)
{
for
(
i
=
0
;
i
<
olen
;
i
++
)
{
struct
node
*
node
=
tnode_get_child
(
oldtnode
,
i
);
struct
node
*
node
=
tnode_get_child
(
oldtnode
,
i
);
...
@@ -625,7 +722,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
...
@@ -625,7 +722,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
if
(
IS_LEAF
(
node
)
||
((
struct
tnode
*
)
node
)
->
pos
>
if
(
IS_LEAF
(
node
)
||
((
struct
tnode
*
)
node
)
->
pos
>
tn
->
pos
+
tn
->
bits
-
1
)
{
tn
->
pos
+
tn
->
bits
-
1
)
{
if
(
tkey_extract_bits
(
node
->
key
,
tn
->
pos
+
tn
->
bits
-
1
,
if
(
tkey_extract_bits
(
node
->
key
,
oldtnode
->
pos
+
oldtnode
->
bits
,
1
)
==
0
)
1
)
==
0
)
put_child
(
t
,
tn
,
2
*
i
,
node
);
put_child
(
t
,
tn
,
2
*
i
,
node
);
else
else
...
@@ -665,27 +762,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
...
@@ -665,27 +762,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
* the position (inode->pos)
* the position (inode->pos)
*/
*/
t_key
m
=
TKEY_GET_MASK
(
inode
->
pos
,
1
);
/* Use the old key, but set the new significant
/* Use the old key, but set the new significant
* bit to zero.
* bit to zero.
*/
*/
left
=
tnode_new
(
inode
->
key
&
(
~
m
),
inode
->
pos
+
1
,
inode
->
bits
-
1
);
if
(
!
left
)
left
=
(
struct
tnode
*
)
tnode_get_child
(
tn
,
2
*
i
);
trie_bug
(
"tnode_new failed"
);
put_child
(
t
,
tn
,
2
*
i
,
NULL
);
if
(
!
left
)
/* Use the old key, but set the new significant
BUG
();
* bit to one.
*/
right
=
(
struct
tnode
*
)
tnode_get_child
(
tn
,
2
*
i
+
1
);
right
=
tnode_new
(
inode
->
key
|
m
,
inode
->
pos
+
1
,
put_child
(
t
,
tn
,
2
*
i
+
1
,
NULL
);
inode
->
bits
-
1
);
if
(
!
right
)
BUG
();
if
(
!
right
)
trie_bug
(
"tnode_new failed"
);
size
=
tnode_child_length
(
left
);
size
=
tnode_child_length
(
left
);
for
(
j
=
0
;
j
<
size
;
j
++
)
{
for
(
j
=
0
;
j
<
size
;
j
++
)
{
put_child
(
t
,
left
,
j
,
inode
->
child
[
j
]);
put_child
(
t
,
left
,
j
,
inode
->
child
[
j
]);
...
@@ -701,7 +793,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
...
@@ -701,7 +793,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
return
tn
;
return
tn
;
}
}
static
struct
tnode
*
halve
(
struct
trie
*
t
,
struct
tnode
*
tn
)
static
struct
tnode
*
halve
(
struct
trie
*
t
,
struct
tnode
*
tn
,
int
*
err
)
{
{
struct
tnode
*
oldtnode
=
tn
;
struct
tnode
*
oldtnode
=
tn
;
struct
node
*
left
,
*
right
;
struct
node
*
left
,
*
right
;
...
@@ -712,8 +804,48 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
...
@@ -712,8 +804,48 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
tn
=
tnode_new
(
oldtnode
->
key
,
oldtnode
->
pos
,
oldtnode
->
bits
-
1
);
tn
=
tnode_new
(
oldtnode
->
key
,
oldtnode
->
pos
,
oldtnode
->
bits
-
1
);
if
(
!
tn
)
if
(
!
tn
)
{
trie_bug
(
"tnode_new failed"
);
*
err
=
-
ENOMEM
;
return
oldtnode
;
}
/*
* Preallocate and store tnodes before the actual work so we
* don't get into an inconsistent state if memory allocation
* fails. In case of failure we return the oldnode and halve
* of tnode is ignored.
*/
for
(
i
=
0
;
i
<
olen
;
i
+=
2
)
{
left
=
tnode_get_child
(
oldtnode
,
i
);
right
=
tnode_get_child
(
oldtnode
,
i
+
1
);
/* Two nonempty children */
if
(
left
&&
right
)
{
struct
tnode
*
newBinNode
=
tnode_new
(
left
->
key
,
tn
->
pos
+
tn
->
bits
,
1
);
if
(
!
newBinNode
)
{
*
err
=
-
ENOMEM
;
break
;
}
put_child
(
t
,
tn
,
i
/
2
,
(
struct
node
*
)
newBinNode
);
}
}
if
(
*
err
)
{
int
size
=
tnode_child_length
(
tn
);
int
j
;
for
(
j
=
0
;
j
<
size
;
j
++
)
if
(
tn
->
child
[
j
])
tnode_free
((
struct
tnode
*
)
tn
->
child
[
j
]);
tnode_free
(
tn
);
*
err
=
-
ENOMEM
;
return
oldtnode
;
}
for
(
i
=
0
;
i
<
olen
;
i
+=
2
)
{
for
(
i
=
0
;
i
<
olen
;
i
+=
2
)
{
left
=
tnode_get_child
(
oldtnode
,
i
);
left
=
tnode_get_child
(
oldtnode
,
i
);
...
@@ -730,10 +862,11 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
...
@@ -730,10 +862,11 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
/* Two nonempty children */
/* Two nonempty children */
else
{
else
{
struct
tnode
*
newBinNode
=
struct
tnode
*
newBinNode
=
tnode_new
(
left
->
key
,
tn
->
pos
+
tn
->
bits
,
1
);
(
struct
tnode
*
)
tnode_get_child
(
tn
,
i
/
2
);
put_child
(
t
,
tn
,
i
/
2
,
NULL
);
if
(
!
newBinNode
)
if
(
!
newBinNode
)
trie_bug
(
"tnode_new failed"
);
BUG
(
);
put_child
(
t
,
newBinNode
,
0
,
left
);
put_child
(
t
,
newBinNode
,
0
,
left
);
put_child
(
t
,
newBinNode
,
1
,
right
);
put_child
(
t
,
newBinNode
,
1
,
right
);
...
@@ -2301,6 +2434,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
...
@@ -2301,6 +2434,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
seq_printf
(
seq
,
"semantic match passed = %d
\n
"
,
t
->
stats
.
semantic_match_passed
);
seq_printf
(
seq
,
"semantic match passed = %d
\n
"
,
t
->
stats
.
semantic_match_passed
);
seq_printf
(
seq
,
"semantic match miss = %d
\n
"
,
t
->
stats
.
semantic_match_miss
);
seq_printf
(
seq
,
"semantic match miss = %d
\n
"
,
t
->
stats
.
semantic_match_miss
);
seq_printf
(
seq
,
"null node hit= %d
\n
"
,
t
->
stats
.
null_node_hit
);
seq_printf
(
seq
,
"null node hit= %d
\n
"
,
t
->
stats
.
null_node_hit
);
seq_printf
(
seq
,
"skipped node resize = %d
\n
"
,
t
->
stats
.
resize_node_skipped
);
#ifdef CLEAR_STATS
#ifdef CLEAR_STATS
memset
(
&
(
t
->
stats
),
0
,
sizeof
(
t
->
stats
));
memset
(
&
(
t
->
stats
),
0
,
sizeof
(
t
->
stats
));
#endif
#endif
...
...
net/ipv4/ip_output.c
浏览文件 @
5432ebb5
...
@@ -389,7 +389,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
...
@@ -389,7 +389,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to
->
pkt_type
=
from
->
pkt_type
;
to
->
pkt_type
=
from
->
pkt_type
;
to
->
priority
=
from
->
priority
;
to
->
priority
=
from
->
priority
;
to
->
protocol
=
from
->
protocol
;
to
->
protocol
=
from
->
protocol
;
to
->
security
=
from
->
security
;
dst_release
(
to
->
dst
);
dst_release
(
to
->
dst
);
to
->
dst
=
dst_clone
(
from
->
dst
);
to
->
dst
=
dst_clone
(
from
->
dst
);
to
->
dev
=
from
->
dev
;
to
->
dev
=
from
->
dev
;
...
@@ -1329,23 +1328,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
...
@@ -1329,23 +1328,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
ip_rt_put
(
rt
);
ip_rt_put
(
rt
);
}
}
/*
* IP protocol layer initialiser
*/
static
struct
packet_type
ip_packet_type
=
{
.
type
=
__constant_htons
(
ETH_P_IP
),
.
func
=
ip_rcv
,
};
/*
* IP registers the packet type and then calls the subprotocol initialisers
*/
void
__init
ip_init
(
void
)
void
__init
ip_init
(
void
)
{
{
dev_add_pack
(
&
ip_packet_type
);
ip_rt_init
();
ip_rt_init
();
inet_initpeers
();
inet_initpeers
();
...
...
net/ipv4/route.c
浏览文件 @
5432ebb5
...
@@ -54,6 +54,7 @@
...
@@ -54,6 +54,7 @@
* Marc Boucher : routing by fwmark
* Marc Boucher : routing by fwmark
* Robert Olsson : Added rt_cache statistics
* Robert Olsson : Added rt_cache statistics
* Arnaldo C. Melo : Convert proc stuff to seq_file
* Arnaldo C. Melo : Convert proc stuff to seq_file
* Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
*
*
* This program is free software; you can redistribute it and/or
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* modify it under the terms of the GNU General Public License
...
@@ -70,6 +71,7 @@
...
@@ -70,6 +71,7 @@
#include <linux/kernel.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/mm.h>
#include <linux/bootmem.h>
#include <linux/string.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/sockios.h>
...
@@ -201,8 +203,37 @@ __u8 ip_tos2prio[16] = {
...
@@ -201,8 +203,37 @@ __u8 ip_tos2prio[16] = {
struct
rt_hash_bucket
{
struct
rt_hash_bucket
{
struct
rtable
*
chain
;
struct
rtable
*
chain
;
spinlock_t
lock
;
};
}
__attribute__
((
__aligned__
(
8
)));
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
/*
* Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
* The size of this table is a power of two and depends on the number of CPUS.
*/
#if NR_CPUS >= 32
#define RT_HASH_LOCK_SZ 4096
#elif NR_CPUS >= 16
#define RT_HASH_LOCK_SZ 2048
#elif NR_CPUS >= 8
#define RT_HASH_LOCK_SZ 1024
#elif NR_CPUS >= 4
#define RT_HASH_LOCK_SZ 512
#else
#define RT_HASH_LOCK_SZ 256
#endif
static
spinlock_t
*
rt_hash_locks
;
# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
# define rt_hash_lock_init() { \
int i; \
rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
spin_lock_init(&rt_hash_locks[i]); \
}
#else
# define rt_hash_lock_addr(slot) NULL
# define rt_hash_lock_init()
#endif
static
struct
rt_hash_bucket
*
rt_hash_table
;
static
struct
rt_hash_bucket
*
rt_hash_table
;
static
unsigned
rt_hash_mask
;
static
unsigned
rt_hash_mask
;
...
@@ -575,19 +606,26 @@ static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
...
@@ -575,19 +606,26 @@ static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
/* This runs via a timer and thus is always in BH context. */
/* This runs via a timer and thus is always in BH context. */
static
void
rt_check_expire
(
unsigned
long
dummy
)
static
void
rt_check_expire
(
unsigned
long
dummy
)
{
{
static
int
rover
;
static
unsigned
int
rover
;
int
i
=
rover
,
t
;
unsigned
int
i
=
rover
,
goal
;
struct
rtable
*
rth
,
**
rthp
;
struct
rtable
*
rth
,
**
rthp
;
unsigned
long
now
=
jiffies
;
unsigned
long
now
=
jiffies
;
u64
mult
;
for
(
t
=
ip_rt_gc_interval
<<
rt_hash_log
;
t
>=
0
;
t
-=
ip_rt_gc_timeout
)
{
mult
=
((
u64
)
ip_rt_gc_interval
)
<<
rt_hash_log
;
if
(
ip_rt_gc_timeout
>
1
)
do_div
(
mult
,
ip_rt_gc_timeout
);
goal
=
(
unsigned
int
)
mult
;
if
(
goal
>
rt_hash_mask
)
goal
=
rt_hash_mask
+
1
;
for
(;
goal
>
0
;
goal
--
)
{
unsigned
long
tmo
=
ip_rt_gc_timeout
;
unsigned
long
tmo
=
ip_rt_gc_timeout
;
i
=
(
i
+
1
)
&
rt_hash_mask
;
i
=
(
i
+
1
)
&
rt_hash_mask
;
rthp
=
&
rt_hash_table
[
i
].
chain
;
rthp
=
&
rt_hash_table
[
i
].
chain
;
spin_lock
(
&
rt_hash_table
[
i
].
lock
);
if
(
*
rthp
==
0
)
continue
;
spin_lock
(
rt_hash_lock_addr
(
i
));
while
((
rth
=
*
rthp
)
!=
NULL
)
{
while
((
rth
=
*
rthp
)
!=
NULL
)
{
if
(
rth
->
u
.
dst
.
expires
)
{
if
(
rth
->
u
.
dst
.
expires
)
{
/* Entry is expired even if it is in use */
/* Entry is expired even if it is in use */
...
@@ -620,14 +658,14 @@ static void rt_check_expire(unsigned long dummy)
...
@@ -620,14 +658,14 @@ static void rt_check_expire(unsigned long dummy)
rt_free
(
rth
);
rt_free
(
rth
);
#endif
/* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
#endif
/* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
}
}
spin_unlock
(
&
rt_hash_table
[
i
].
lock
);
spin_unlock
(
rt_hash_lock_addr
(
i
)
);
/* Fallback loop breaker. */
/* Fallback loop breaker. */
if
(
time_after
(
jiffies
,
now
))
if
(
time_after
(
jiffies
,
now
))
break
;
break
;
}
}
rover
=
i
;
rover
=
i
;
mod_timer
(
&
rt_periodic_timer
,
now
+
ip_rt_gc_interval
);
mod_timer
(
&
rt_periodic_timer
,
jiffies
+
ip_rt_gc_interval
);
}
}
/* This can run from both BH and non-BH contexts, the latter
/* This can run from both BH and non-BH contexts, the latter
...
@@ -643,11 +681,11 @@ static void rt_run_flush(unsigned long dummy)
...
@@ -643,11 +681,11 @@ static void rt_run_flush(unsigned long dummy)
get_random_bytes
(
&
rt_hash_rnd
,
4
);
get_random_bytes
(
&
rt_hash_rnd
,
4
);
for
(
i
=
rt_hash_mask
;
i
>=
0
;
i
--
)
{
for
(
i
=
rt_hash_mask
;
i
>=
0
;
i
--
)
{
spin_lock_bh
(
&
rt_hash_table
[
i
].
lock
);
spin_lock_bh
(
rt_hash_lock_addr
(
i
)
);
rth
=
rt_hash_table
[
i
].
chain
;
rth
=
rt_hash_table
[
i
].
chain
;
if
(
rth
)
if
(
rth
)
rt_hash_table
[
i
].
chain
=
NULL
;
rt_hash_table
[
i
].
chain
=
NULL
;
spin_unlock_bh
(
&
rt_hash_table
[
i
].
lock
);
spin_unlock_bh
(
rt_hash_lock_addr
(
i
)
);
for
(;
rth
;
rth
=
next
)
{
for
(;
rth
;
rth
=
next
)
{
next
=
rth
->
u
.
rt_next
;
next
=
rth
->
u
.
rt_next
;
...
@@ -780,7 +818,7 @@ static int rt_garbage_collect(void)
...
@@ -780,7 +818,7 @@ static int rt_garbage_collect(void)
k
=
(
k
+
1
)
&
rt_hash_mask
;
k
=
(
k
+
1
)
&
rt_hash_mask
;
rthp
=
&
rt_hash_table
[
k
].
chain
;
rthp
=
&
rt_hash_table
[
k
].
chain
;
spin_lock_bh
(
&
rt_hash_table
[
k
].
lock
);
spin_lock_bh
(
rt_hash_lock_addr
(
k
)
);
while
((
rth
=
*
rthp
)
!=
NULL
)
{
while
((
rth
=
*
rthp
)
!=
NULL
)
{
if
(
!
rt_may_expire
(
rth
,
tmo
,
expire
))
{
if
(
!
rt_may_expire
(
rth
,
tmo
,
expire
))
{
tmo
>>=
1
;
tmo
>>=
1
;
...
@@ -812,7 +850,7 @@ static int rt_garbage_collect(void)
...
@@ -812,7 +850,7 @@ static int rt_garbage_collect(void)
goal
--
;
goal
--
;
#endif
/* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
#endif
/* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
}
}
spin_unlock_bh
(
&
rt_hash_table
[
k
].
lock
);
spin_unlock_bh
(
rt_hash_lock_addr
(
k
)
);
if
(
goal
<=
0
)
if
(
goal
<=
0
)
break
;
break
;
}
}
...
@@ -882,7 +920,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
...
@@ -882,7 +920,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
rthp
=
&
rt_hash_table
[
hash
].
chain
;
rthp
=
&
rt_hash_table
[
hash
].
chain
;
spin_lock_bh
(
&
rt_hash_table
[
hash
].
lock
);
spin_lock_bh
(
rt_hash_lock_addr
(
hash
)
);
while
((
rth
=
*
rthp
)
!=
NULL
)
{
while
((
rth
=
*
rthp
)
!=
NULL
)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
if
(
!
(
rth
->
u
.
dst
.
flags
&
DST_BALANCED
)
&&
if
(
!
(
rth
->
u
.
dst
.
flags
&
DST_BALANCED
)
&&
...
@@ -908,7 +946,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
...
@@ -908,7 +946,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
rth
->
u
.
dst
.
__use
++
;
rth
->
u
.
dst
.
__use
++
;
dst_hold
(
&
rth
->
u
.
dst
);
dst_hold
(
&
rth
->
u
.
dst
);
rth
->
u
.
dst
.
lastuse
=
now
;
rth
->
u
.
dst
.
lastuse
=
now
;
spin_unlock_bh
(
&
rt_hash_table
[
hash
].
lock
);
spin_unlock_bh
(
rt_hash_lock_addr
(
hash
)
);
rt_drop
(
rt
);
rt_drop
(
rt
);
*
rp
=
rth
;
*
rp
=
rth
;
...
@@ -949,7 +987,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
...
@@ -949,7 +987,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
if
(
rt
->
rt_type
==
RTN_UNICAST
||
rt
->
fl
.
iif
==
0
)
{
if
(
rt
->
rt_type
==
RTN_UNICAST
||
rt
->
fl
.
iif
==
0
)
{
int
err
=
arp_bind_neighbour
(
&
rt
->
u
.
dst
);
int
err
=
arp_bind_neighbour
(
&
rt
->
u
.
dst
);
if
(
err
)
{
if
(
err
)
{
spin_unlock_bh
(
&
rt_hash_table
[
hash
].
lock
);
spin_unlock_bh
(
rt_hash_lock_addr
(
hash
)
);
if
(
err
!=
-
ENOBUFS
)
{
if
(
err
!=
-
ENOBUFS
)
{
rt_drop
(
rt
);
rt_drop
(
rt
);
...
@@ -990,7 +1028,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
...
@@ -990,7 +1028,7 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
}
}
#endif
#endif
rt_hash_table
[
hash
].
chain
=
rt
;
rt_hash_table
[
hash
].
chain
=
rt
;
spin_unlock_bh
(
&
rt_hash_table
[
hash
].
lock
);
spin_unlock_bh
(
rt_hash_lock_addr
(
hash
)
);
*
rp
=
rt
;
*
rp
=
rt
;
return
0
;
return
0
;
}
}
...
@@ -1058,7 +1096,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
...
@@ -1058,7 +1096,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
{
{
struct
rtable
**
rthp
;
struct
rtable
**
rthp
;
spin_lock_bh
(
&
rt_hash_table
[
hash
].
lock
);
spin_lock_bh
(
rt_hash_lock_addr
(
hash
)
);
ip_rt_put
(
rt
);
ip_rt_put
(
rt
);
for
(
rthp
=
&
rt_hash_table
[
hash
].
chain
;
*
rthp
;
for
(
rthp
=
&
rt_hash_table
[
hash
].
chain
;
*
rthp
;
rthp
=
&
(
*
rthp
)
->
u
.
rt_next
)
rthp
=
&
(
*
rthp
)
->
u
.
rt_next
)
...
@@ -1067,7 +1105,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
...
@@ -1067,7 +1105,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
rt_free
(
rt
);
rt_free
(
rt
);
break
;
break
;
}
}
spin_unlock_bh
(
&
rt_hash_table
[
hash
].
lock
);
spin_unlock_bh
(
rt_hash_lock_addr
(
hash
)
);
}
}
void
ip_rt_redirect
(
u32
old_gw
,
u32
daddr
,
u32
new_gw
,
void
ip_rt_redirect
(
u32
old_gw
,
u32
daddr
,
u32
new_gw
,
...
@@ -3073,12 +3111,14 @@ __setup("rhash_entries=", set_rhash_entries);
...
@@ -3073,12 +3111,14 @@ __setup("rhash_entries=", set_rhash_entries);
int
__init
ip_rt_init
(
void
)
int
__init
ip_rt_init
(
void
)
{
{
int
i
,
order
,
goal
,
rc
=
0
;
int
rc
=
0
;
rt_hash_rnd
=
(
int
)
((
num_physpages
^
(
num_physpages
>>
8
))
^
rt_hash_rnd
=
(
int
)
((
num_physpages
^
(
num_physpages
>>
8
))
^
(
jiffies
^
(
jiffies
>>
7
)));
(
jiffies
^
(
jiffies
>>
7
)));
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_NET_CLS_ROUTE
{
int
order
;
for
(
order
=
0
;
for
(
order
=
0
;
(
PAGE_SIZE
<<
order
)
<
256
*
sizeof
(
struct
ip_rt_acct
)
*
NR_CPUS
;
order
++
)
(
PAGE_SIZE
<<
order
)
<
256
*
sizeof
(
struct
ip_rt_acct
)
*
NR_CPUS
;
order
++
)
/* NOTHING */
;
/* NOTHING */
;
...
@@ -3086,6 +3126,7 @@ int __init ip_rt_init(void)
...
@@ -3086,6 +3126,7 @@ int __init ip_rt_init(void)
if
(
!
ip_rt_acct
)
if
(
!
ip_rt_acct
)
panic
(
"IP: failed to allocate ip_rt_acct
\n
"
);
panic
(
"IP: failed to allocate ip_rt_acct
\n
"
);
memset
(
ip_rt_acct
,
0
,
PAGE_SIZE
<<
order
);
memset
(
ip_rt_acct
,
0
,
PAGE_SIZE
<<
order
);
}
#endif
#endif
ipv4_dst_ops
.
kmem_cachep
=
kmem_cache_create
(
"ip_dst_cache"
,
ipv4_dst_ops
.
kmem_cachep
=
kmem_cache_create
(
"ip_dst_cache"
,
...
@@ -3096,36 +3137,19 @@ int __init ip_rt_init(void)
...
@@ -3096,36 +3137,19 @@ int __init ip_rt_init(void)
if
(
!
ipv4_dst_ops
.
kmem_cachep
)
if
(
!
ipv4_dst_ops
.
kmem_cachep
)
panic
(
"IP: failed to allocate ip_dst_cache
\n
"
);
panic
(
"IP: failed to allocate ip_dst_cache
\n
"
);
goal
=
num_physpages
>>
(
26
-
PAGE_SHIFT
);
rt_hash_table
=
(
struct
rt_hash_bucket
*
)
if
(
rhash_entries
)
alloc_large_system_hash
(
"IP route cache"
,
goal
=
(
rhash_entries
*
sizeof
(
struct
rt_hash_bucket
))
>>
PAGE_SHIFT
;
sizeof
(
struct
rt_hash_bucket
),
for
(
order
=
0
;
(
1UL
<<
order
)
<
goal
;
order
++
)
rhash_entries
,
/* NOTHING */
;
(
num_physpages
>=
128
*
1024
)
?
(
27
-
PAGE_SHIFT
)
:
do
{
(
29
-
PAGE_SHIFT
),
rt_hash_mask
=
(
1UL
<<
order
)
*
PAGE_SIZE
/
HASH_HIGHMEM
,
sizeof
(
struct
rt_hash_bucket
);
&
rt_hash_log
,
while
(
rt_hash_mask
&
(
rt_hash_mask
-
1
))
&
rt_hash_mask
,
rt_hash_mask
--
;
0
);
rt_hash_table
=
(
struct
rt_hash_bucket
*
)
memset
(
rt_hash_table
,
0
,
(
rt_hash_mask
+
1
)
*
sizeof
(
struct
rt_hash_bucket
));
__get_free_pages
(
GFP_ATOMIC
,
order
);
rt_hash_lock_init
();
}
while
(
rt_hash_table
==
NULL
&&
--
order
>
0
);
if
(
!
rt_hash_table
)
panic
(
"Failed to allocate IP route cache hash table
\n
"
);
printk
(
KERN_INFO
"IP: routing cache hash table of %u buckets, %ldKbytes
\n
"
,
rt_hash_mask
,
(
long
)
(
rt_hash_mask
*
sizeof
(
struct
rt_hash_bucket
))
/
1024
);
for
(
rt_hash_log
=
0
;
(
1
<<
rt_hash_log
)
!=
rt_hash_mask
;
rt_hash_log
++
)
/* NOTHING */
;
rt_hash_mask
--
;
for
(
i
=
0
;
i
<=
rt_hash_mask
;
i
++
)
{
spin_lock_init
(
&
rt_hash_table
[
i
].
lock
);
rt_hash_table
[
i
].
chain
=
NULL
;
}
ipv4_dst_ops
.
gc_thresh
=
(
rt_hash_mask
+
1
);
ipv4_dst_ops
.
gc_thresh
=
(
rt_hash_mask
+
1
);
ip_rt_max_size
=
(
rt_hash_mask
+
1
)
*
16
;
ip_rt_max_size
=
(
rt_hash_mask
+
1
)
*
16
;
...
...
net/ipv4/tcp.c
浏览文件 @
5432ebb5
...
@@ -615,7 +615,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
...
@@ -615,7 +615,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
size_t
psize
,
int
flags
)
size_t
psize
,
int
flags
)
{
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
int
mss_now
;
int
mss_now
,
size_goal
;
int
err
;
int
err
;
ssize_t
copied
;
ssize_t
copied
;
long
timeo
=
sock_sndtimeo
(
sk
,
flags
&
MSG_DONTWAIT
);
long
timeo
=
sock_sndtimeo
(
sk
,
flags
&
MSG_DONTWAIT
);
...
@@ -628,6 +628,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
...
@@ -628,6 +628,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
clear_bit
(
SOCK_ASYNC_NOSPACE
,
&
sk
->
sk_socket
->
flags
);
clear_bit
(
SOCK_ASYNC_NOSPACE
,
&
sk
->
sk_socket
->
flags
);
mss_now
=
tcp_current_mss
(
sk
,
!
(
flags
&
MSG_OOB
));
mss_now
=
tcp_current_mss
(
sk
,
!
(
flags
&
MSG_OOB
));
size_goal
=
tp
->
xmit_size_goal
;
copied
=
0
;
copied
=
0
;
err
=
-
EPIPE
;
err
=
-
EPIPE
;
...
@@ -641,7 +642,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
...
@@ -641,7 +642,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
int
offset
=
poffset
%
PAGE_SIZE
;
int
offset
=
poffset
%
PAGE_SIZE
;
int
size
=
min_t
(
size_t
,
psize
,
PAGE_SIZE
-
offset
);
int
size
=
min_t
(
size_t
,
psize
,
PAGE_SIZE
-
offset
);
if
(
!
sk
->
sk_send_head
||
(
copy
=
mss_now
-
skb
->
len
)
<=
0
)
{
if
(
!
sk
->
sk_send_head
||
(
copy
=
size_goal
-
skb
->
len
)
<=
0
)
{
new_segment:
new_segment:
if
(
!
sk_stream_memory_free
(
sk
))
if
(
!
sk_stream_memory_free
(
sk
))
goto
wait_for_sndbuf
;
goto
wait_for_sndbuf
;
...
@@ -652,7 +653,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
...
@@ -652,7 +653,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
goto
wait_for_memory
;
goto
wait_for_memory
;
skb_entail
(
sk
,
tp
,
skb
);
skb_entail
(
sk
,
tp
,
skb
);
copy
=
mss_now
;
copy
=
size_goal
;
}
}
if
(
copy
>
size
)
if
(
copy
>
size
)
...
@@ -693,7 +694,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
...
@@ -693,7 +694,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
if
(
!
(
psize
-=
copy
))
if
(
!
(
psize
-=
copy
))
goto
out
;
goto
out
;
if
(
skb
->
len
!=
mss_now
||
(
flags
&
MSG_OOB
))
if
(
skb
->
len
<
mss_now
||
(
flags
&
MSG_OOB
))
continue
;
continue
;
if
(
forced_push
(
tp
))
{
if
(
forced_push
(
tp
))
{
...
@@ -713,6 +714,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
...
@@ -713,6 +714,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
goto
do_error
;
goto
do_error
;
mss_now
=
tcp_current_mss
(
sk
,
!
(
flags
&
MSG_OOB
));
mss_now
=
tcp_current_mss
(
sk
,
!
(
flags
&
MSG_OOB
));
size_goal
=
tp
->
xmit_size_goal
;
}
}
out:
out:
...
@@ -754,15 +756,20 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
...
@@ -754,15 +756,20 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
static
inline
int
select_size
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
static
inline
int
select_size
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
{
{
int
tmp
=
tp
->
mss_cache
_std
;
int
tmp
=
tp
->
mss_cache
;
if
(
sk
->
sk_route_caps
&
NETIF_F_SG
)
{
if
(
sk
->
sk_route_caps
&
NETIF_F_SG
)
{
int
pgbreak
=
SKB_MAX_HEAD
(
MAX_TCP_HEADER
);
if
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
tmp
=
0
;
else
{
int
pgbreak
=
SKB_MAX_HEAD
(
MAX_TCP_HEADER
);
if
(
tmp
>=
pgbreak
&&
if
(
tmp
>=
pgbreak
&&
tmp
<=
pgbreak
+
(
MAX_SKB_FRAGS
-
1
)
*
PAGE_SIZE
)
tmp
<=
pgbreak
+
(
MAX_SKB_FRAGS
-
1
)
*
PAGE_SIZE
)
tmp
=
pgbreak
;
tmp
=
pgbreak
;
}
}
}
return
tmp
;
return
tmp
;
}
}
...
@@ -773,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
...
@@ -773,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
sk_buff
*
skb
;
struct
sk_buff
*
skb
;
int
iovlen
,
flags
;
int
iovlen
,
flags
;
int
mss_now
;
int
mss_now
,
size_goal
;
int
err
,
copied
;
int
err
,
copied
;
long
timeo
;
long
timeo
;
...
@@ -792,6 +799,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
...
@@ -792,6 +799,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
clear_bit
(
SOCK_ASYNC_NOSPACE
,
&
sk
->
sk_socket
->
flags
);
clear_bit
(
SOCK_ASYNC_NOSPACE
,
&
sk
->
sk_socket
->
flags
);
mss_now
=
tcp_current_mss
(
sk
,
!
(
flags
&
MSG_OOB
));
mss_now
=
tcp_current_mss
(
sk
,
!
(
flags
&
MSG_OOB
));
size_goal
=
tp
->
xmit_size_goal
;
/* Ok commence sending. */
/* Ok commence sending. */
iovlen
=
msg
->
msg_iovlen
;
iovlen
=
msg
->
msg_iovlen
;
...
@@ -814,7 +822,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
...
@@ -814,7 +822,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
skb
=
sk
->
sk_write_queue
.
prev
;
skb
=
sk
->
sk_write_queue
.
prev
;
if
(
!
sk
->
sk_send_head
||
if
(
!
sk
->
sk_send_head
||
(
copy
=
mss_now
-
skb
->
len
)
<=
0
)
{
(
copy
=
size_goal
-
skb
->
len
)
<=
0
)
{
new_segment:
new_segment:
/* Allocate new segment. If the interface is SG,
/* Allocate new segment. If the interface is SG,
...
@@ -837,7 +845,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
...
@@ -837,7 +845,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
skb
->
ip_summed
=
CHECKSUM_HW
;
skb
->
ip_summed
=
CHECKSUM_HW
;
skb_entail
(
sk
,
tp
,
skb
);
skb_entail
(
sk
,
tp
,
skb
);
copy
=
mss_now
;
copy
=
size_goal
;
}
}
/* Try to append data to the end of skb. */
/* Try to append data to the end of skb. */
...
@@ -872,11 +880,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
...
@@ -872,11 +880,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
tcp_mark_push
(
tp
,
skb
);
tcp_mark_push
(
tp
,
skb
);
goto
new_segment
;
goto
new_segment
;
}
else
if
(
page
)
{
}
else
if
(
page
)
{
/* If page is cached, align
* offset to L1 cache boundary
*/
off
=
(
off
+
L1_CACHE_BYTES
-
1
)
&
~
(
L1_CACHE_BYTES
-
1
);
if
(
off
==
PAGE_SIZE
)
{
if
(
off
==
PAGE_SIZE
)
{
put_page
(
page
);
put_page
(
page
);
TCP_PAGE
(
sk
)
=
page
=
NULL
;
TCP_PAGE
(
sk
)
=
page
=
NULL
;
...
@@ -937,7 +940,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
...
@@ -937,7 +940,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if
((
seglen
-=
copy
)
==
0
&&
iovlen
==
0
)
if
((
seglen
-=
copy
)
==
0
&&
iovlen
==
0
)
goto
out
;
goto
out
;
if
(
skb
->
len
!=
mss_now
||
(
flags
&
MSG_OOB
))
if
(
skb
->
len
<
mss_now
||
(
flags
&
MSG_OOB
))
continue
;
continue
;
if
(
forced_push
(
tp
))
{
if
(
forced_push
(
tp
))
{
...
@@ -957,6 +960,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
...
@@ -957,6 +960,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto
do_error
;
goto
do_error
;
mss_now
=
tcp_current_mss
(
sk
,
!
(
flags
&
MSG_OOB
));
mss_now
=
tcp_current_mss
(
sk
,
!
(
flags
&
MSG_OOB
));
size_goal
=
tp
->
xmit_size_goal
;
}
}
}
}
...
@@ -2128,7 +2132,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
...
@@ -2128,7 +2132,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info
->
tcpi_rto
=
jiffies_to_usecs
(
tp
->
rto
);
info
->
tcpi_rto
=
jiffies_to_usecs
(
tp
->
rto
);
info
->
tcpi_ato
=
jiffies_to_usecs
(
tp
->
ack
.
ato
);
info
->
tcpi_ato
=
jiffies_to_usecs
(
tp
->
ack
.
ato
);
info
->
tcpi_snd_mss
=
tp
->
mss_cache
_std
;
info
->
tcpi_snd_mss
=
tp
->
mss_cache
;
info
->
tcpi_rcv_mss
=
tp
->
ack
.
rcv_mss
;
info
->
tcpi_rcv_mss
=
tp
->
ack
.
rcv_mss
;
info
->
tcpi_unacked
=
tp
->
packets_out
;
info
->
tcpi_unacked
=
tp
->
packets_out
;
...
@@ -2178,7 +2182,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
...
@@ -2178,7 +2182,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
switch
(
optname
)
{
switch
(
optname
)
{
case
TCP_MAXSEG
:
case
TCP_MAXSEG
:
val
=
tp
->
mss_cache
_std
;
val
=
tp
->
mss_cache
;
if
(
!
val
&&
((
1
<<
sk
->
sk_state
)
&
(
TCPF_CLOSE
|
TCPF_LISTEN
)))
if
(
!
val
&&
((
1
<<
sk
->
sk_state
)
&
(
TCPF_CLOSE
|
TCPF_LISTEN
)))
val
=
tp
->
rx_opt
.
user_mss
;
val
=
tp
->
rx_opt
.
user_mss
;
break
;
break
;
...
...
net/ipv4/tcp_input.c
浏览文件 @
5432ebb5
...
@@ -740,10 +740,10 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
...
@@ -740,10 +740,10 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
__u32
cwnd
=
(
dst
?
dst_metric
(
dst
,
RTAX_INITCWND
)
:
0
);
__u32
cwnd
=
(
dst
?
dst_metric
(
dst
,
RTAX_INITCWND
)
:
0
);
if
(
!
cwnd
)
{
if
(
!
cwnd
)
{
if
(
tp
->
mss_cache
_std
>
1460
)
if
(
tp
->
mss_cache
>
1460
)
cwnd
=
2
;
cwnd
=
2
;
else
else
cwnd
=
(
tp
->
mss_cache
_std
>
1095
)
?
3
:
4
;
cwnd
=
(
tp
->
mss_cache
>
1095
)
?
3
:
4
;
}
}
return
min_t
(
__u32
,
cwnd
,
tp
->
snd_cwnd_clamp
);
return
min_t
(
__u32
,
cwnd
,
tp
->
snd_cwnd_clamp
);
}
}
...
@@ -914,7 +914,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
...
@@ -914,7 +914,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
{
if
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
{
sk
->
sk_route_caps
&=
~
NETIF_F_TSO
;
sk
->
sk_route_caps
&=
~
NETIF_F_TSO
;
sock_set_flag
(
sk
,
SOCK_NO_LARGESEND
);
sock_set_flag
(
sk
,
SOCK_NO_LARGESEND
);
tp
->
mss_cache
=
tp
->
mss_cache
_std
;
tp
->
mss_cache
=
tp
->
mss_cache
;
}
}
if
(
!
tp
->
sacked_out
)
if
(
!
tp
->
sacked_out
)
...
@@ -1077,7 +1077,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
...
@@ -1077,7 +1077,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
(
IsFack
(
tp
)
||
(
IsFack
(
tp
)
||
!
before
(
lost_retrans
,
!
before
(
lost_retrans
,
TCP_SKB_CB
(
skb
)
->
ack_seq
+
tp
->
reordering
*
TCP_SKB_CB
(
skb
)
->
ack_seq
+
tp
->
reordering
*
tp
->
mss_cache
_std
)))
{
tp
->
mss_cache
)))
{
TCP_SKB_CB
(
skb
)
->
sacked
&=
~
TCPCB_SACKED_RETRANS
;
TCP_SKB_CB
(
skb
)
->
sacked
&=
~
TCPCB_SACKED_RETRANS
;
tp
->
retrans_out
-=
tcp_skb_pcount
(
skb
);
tp
->
retrans_out
-=
tcp_skb_pcount
(
skb
);
...
@@ -1957,15 +1957,6 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
...
@@ -1957,15 +1957,6 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
}
}
}
}
/* There is one downside to this scheme. Although we keep the
* ACK clock ticking, adjusting packet counters and advancing
* congestion window, we do not liberate socket send buffer
* space.
*
* Mucking with skb->truesize and sk->sk_wmem_alloc et al.
* then making a write space wakeup callback is a possible
* future enhancement. WARNING: it is not trivial to make.
*/
static
int
tcp_tso_acked
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
,
static
int
tcp_tso_acked
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
,
__u32
now
,
__s32
*
seq_rtt
)
__u32
now
,
__s32
*
seq_rtt
)
{
{
...
@@ -2047,7 +2038,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
...
@@ -2047,7 +2038,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
* the other end.
* the other end.
*/
*/
if
(
after
(
scb
->
end_seq
,
tp
->
snd_una
))
{
if
(
after
(
scb
->
end_seq
,
tp
->
snd_una
))
{
if
(
tcp_skb_pcount
(
skb
)
>
1
)
if
(
tcp_skb_pcount
(
skb
)
>
1
&&
after
(
tp
->
snd_una
,
scb
->
seq
))
acked
|=
tcp_tso_acked
(
sk
,
skb
,
acked
|=
tcp_tso_acked
(
sk
,
skb
,
now
,
&
seq_rtt
);
now
,
&
seq_rtt
);
break
;
break
;
...
@@ -3308,6 +3300,28 @@ void tcp_cwnd_application_limited(struct sock *sk)
...
@@ -3308,6 +3300,28 @@ void tcp_cwnd_application_limited(struct sock *sk)
tp
->
snd_cwnd_stamp
=
tcp_time_stamp
;
tp
->
snd_cwnd_stamp
=
tcp_time_stamp
;
}
}
static
inline
int
tcp_should_expand_sndbuf
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
{
/* If the user specified a specific send buffer setting, do
* not modify it.
*/
if
(
sk
->
sk_userlocks
&
SOCK_SNDBUF_LOCK
)
return
0
;
/* If we are under global TCP memory pressure, do not expand. */
if
(
tcp_memory_pressure
)
return
0
;
/* If we are under soft global TCP memory pressure, do not expand. */
if
(
atomic_read
(
&
tcp_memory_allocated
)
>=
sysctl_tcp_mem
[
0
])
return
0
;
/* If we filled the congestion window, do not expand. */
if
(
tp
->
packets_out
>=
tp
->
snd_cwnd
)
return
0
;
return
1
;
}
/* When incoming ACK allowed to free some skb from write_queue,
/* When incoming ACK allowed to free some skb from write_queue,
* we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket
* we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket
...
@@ -3319,11 +3333,8 @@ static void tcp_new_space(struct sock *sk)
...
@@ -3319,11 +3333,8 @@ static void tcp_new_space(struct sock *sk)
{
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
if
(
tp
->
packets_out
<
tp
->
snd_cwnd
&&
if
(
tcp_should_expand_sndbuf
(
sk
,
tp
))
{
!
(
sk
->
sk_userlocks
&
SOCK_SNDBUF_LOCK
)
&&
int
sndmem
=
max_t
(
u32
,
tp
->
rx_opt
.
mss_clamp
,
tp
->
mss_cache
)
+
!
tcp_memory_pressure
&&
atomic_read
(
&
tcp_memory_allocated
)
<
sysctl_tcp_mem
[
0
])
{
int
sndmem
=
max_t
(
u32
,
tp
->
rx_opt
.
mss_clamp
,
tp
->
mss_cache_std
)
+
MAX_TCP_HEADER
+
16
+
sizeof
(
struct
sk_buff
),
MAX_TCP_HEADER
+
16
+
sizeof
(
struct
sk_buff
),
demanded
=
max_t
(
unsigned
int
,
tp
->
snd_cwnd
,
demanded
=
max_t
(
unsigned
int
,
tp
->
snd_cwnd
,
tp
->
reordering
+
1
);
tp
->
reordering
+
1
);
...
@@ -3346,22 +3357,9 @@ static inline void tcp_check_space(struct sock *sk)
...
@@ -3346,22 +3357,9 @@ static inline void tcp_check_space(struct sock *sk)
}
}
}
}
static
void
__tcp_data_snd_check
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
)
static
__inline__
void
tcp_data_snd_check
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
if
(
after
(
TCP_SKB_CB
(
skb
)
->
end_seq
,
tp
->
snd_una
+
tp
->
snd_wnd
)
||
tcp_packets_in_flight
(
tp
)
>=
tp
->
snd_cwnd
||
tcp_write_xmit
(
sk
,
tp
->
nonagle
))
tcp_check_probe_timer
(
sk
,
tp
);
}
static
__inline__
void
tcp_data_snd_check
(
struct
sock
*
sk
)
{
{
struct
sk_buff
*
skb
=
sk
->
sk_send_head
;
tcp_push_pending_frames
(
sk
,
tp
);
if
(
skb
!=
NULL
)
__tcp_data_snd_check
(
sk
,
skb
);
tcp_check_space
(
sk
);
tcp_check_space
(
sk
);
}
}
...
@@ -3655,7 +3653,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
...
@@ -3655,7 +3653,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
*/
*/
tcp_ack
(
sk
,
skb
,
0
);
tcp_ack
(
sk
,
skb
,
0
);
__kfree_skb
(
skb
);
__kfree_skb
(
skb
);
tcp_data_snd_check
(
sk
);
tcp_data_snd_check
(
sk
,
tp
);
return
0
;
return
0
;
}
else
{
/* Header too small */
}
else
{
/* Header too small */
TCP_INC_STATS_BH
(
TCP_MIB_INERRS
);
TCP_INC_STATS_BH
(
TCP_MIB_INERRS
);
...
@@ -3721,7 +3719,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
...
@@ -3721,7 +3719,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if
(
TCP_SKB_CB
(
skb
)
->
ack_seq
!=
tp
->
snd_una
)
{
if
(
TCP_SKB_CB
(
skb
)
->
ack_seq
!=
tp
->
snd_una
)
{
/* Well, only one small jumplet in fast path... */
/* Well, only one small jumplet in fast path... */
tcp_ack
(
sk
,
skb
,
FLAG_DATA
);
tcp_ack
(
sk
,
skb
,
FLAG_DATA
);
tcp_data_snd_check
(
sk
);
tcp_data_snd_check
(
sk
,
tp
);
if
(
!
tcp_ack_scheduled
(
tp
))
if
(
!
tcp_ack_scheduled
(
tp
))
goto
no_ack
;
goto
no_ack
;
}
}
...
@@ -3799,7 +3797,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
...
@@ -3799,7 +3797,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
/* step 7: process the segment text */
/* step 7: process the segment text */
tcp_data_queue
(
sk
,
skb
);
tcp_data_queue
(
sk
,
skb
);
tcp_data_snd_check
(
sk
);
tcp_data_snd_check
(
sk
,
tp
);
tcp_ack_snd_check
(
sk
);
tcp_ack_snd_check
(
sk
);
return
0
;
return
0
;
...
@@ -4109,7 +4107,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
...
@@ -4109,7 +4107,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* Do step6 onward by hand. */
/* Do step6 onward by hand. */
tcp_urg
(
sk
,
skb
,
th
);
tcp_urg
(
sk
,
skb
,
th
);
__kfree_skb
(
skb
);
__kfree_skb
(
skb
);
tcp_data_snd_check
(
sk
);
tcp_data_snd_check
(
sk
,
tp
);
return
0
;
return
0
;
}
}
...
@@ -4300,7 +4298,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
...
@@ -4300,7 +4298,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* tcp_data could move socket to TIME-WAIT */
/* tcp_data could move socket to TIME-WAIT */
if
(
sk
->
sk_state
!=
TCP_CLOSE
)
{
if
(
sk
->
sk_state
!=
TCP_CLOSE
)
{
tcp_data_snd_check
(
sk
);
tcp_data_snd_check
(
sk
,
tp
);
tcp_ack_snd_check
(
sk
);
tcp_ack_snd_check
(
sk
);
}
}
...
...
net/ipv4/tcp_ipv4.c
浏览文件 @
5432ebb5
...
@@ -2045,7 +2045,7 @@ static int tcp_v4_init_sock(struct sock *sk)
...
@@ -2045,7 +2045,7 @@ static int tcp_v4_init_sock(struct sock *sk)
*/
*/
tp
->
snd_ssthresh
=
0x7fffffff
;
/* Infinity */
tp
->
snd_ssthresh
=
0x7fffffff
;
/* Infinity */
tp
->
snd_cwnd_clamp
=
~
0
;
tp
->
snd_cwnd_clamp
=
~
0
;
tp
->
mss_cache
_std
=
tp
->
mss_cache
=
536
;
tp
->
mss_cache
=
536
;
tp
->
reordering
=
sysctl_tcp_reordering
;
tp
->
reordering
=
sysctl_tcp_reordering
;
tp
->
ca_ops
=
&
tcp_init_congestion_ops
;
tp
->
ca_ops
=
&
tcp_init_congestion_ops
;
...
...
net/ipv4/tcp_output.c
浏览文件 @
5432ebb5
...
@@ -49,7 +49,7 @@ int sysctl_tcp_retrans_collapse = 1;
...
@@ -49,7 +49,7 @@ int sysctl_tcp_retrans_collapse = 1;
* will allow a single TSO frame to consume. Building TSO frames
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
* which are too large can cause TCP streams to be bursty.
*/
*/
int
sysctl_tcp_tso_win_divisor
=
8
;
int
sysctl_tcp_tso_win_divisor
=
3
;
static
inline
void
update_send_head
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
,
static
inline
void
update_send_head
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
,
struct
sk_buff
*
skb
)
struct
sk_buff
*
skb
)
...
@@ -140,11 +140,11 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
...
@@ -140,11 +140,11 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
tp
->
ack
.
pingpong
=
1
;
tp
->
ack
.
pingpong
=
1
;
}
}
static
__inline__
void
tcp_event_ack_sent
(
struct
sock
*
sk
)
static
__inline__
void
tcp_event_ack_sent
(
struct
sock
*
sk
,
unsigned
int
pkts
)
{
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
tcp_dec_quickack_mode
(
tp
);
tcp_dec_quickack_mode
(
tp
,
pkts
);
tcp_clear_xmit_timer
(
sk
,
TCP_TIME_DACK
);
tcp_clear_xmit_timer
(
sk
,
TCP_TIME_DACK
);
}
}
...
@@ -355,7 +355,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
...
@@ -355,7 +355,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
tp
->
af_specific
->
send_check
(
sk
,
th
,
skb
->
len
,
skb
);
tp
->
af_specific
->
send_check
(
sk
,
th
,
skb
->
len
,
skb
);
if
(
tcb
->
flags
&
TCPCB_FLAG_ACK
)
if
(
tcb
->
flags
&
TCPCB_FLAG_ACK
)
tcp_event_ack_sent
(
sk
);
tcp_event_ack_sent
(
sk
,
tcp_skb_pcount
(
skb
)
);
if
(
skb
->
len
!=
tcp_header_size
)
if
(
skb
->
len
!=
tcp_header_size
)
tcp_event_data_sent
(
tp
,
skb
,
sk
);
tcp_event_data_sent
(
tp
,
skb
,
sk
);
...
@@ -403,42 +403,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
...
@@ -403,42 +403,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
sk
->
sk_send_head
=
skb
;
sk
->
sk_send_head
=
skb
;
}
}
static
inline
void
tcp_tso_set_push
(
struct
sk_buff
*
skb
)
static
void
tcp_set_skb_tso_segs
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
)
{
/* Force push to be on for any TSO frames to workaround
* problems with busted implementations like Mac OS-X that
* hold off socket receive wakeups until push is seen.
*/
if
(
tcp_skb_pcount
(
skb
)
>
1
)
TCP_SKB_CB
(
skb
)
->
flags
|=
TCPCB_FLAG_PSH
;
}
/* Send _single_ skb sitting at the send head. This function requires
* true push pending frames to setup probe timer etc.
*/
void
tcp_push_one
(
struct
sock
*
sk
,
unsigned
cur_mss
)
{
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
sk_buff
*
skb
=
sk
->
sk_send_head
;
if
(
tcp_snd_test
(
sk
,
skb
,
cur_mss
,
TCP_NAGLE_PUSH
))
{
if
(
skb
->
len
<=
tp
->
mss_cache
||
/* Send it out now. */
TCP_SKB_CB
(
skb
)
->
when
=
tcp_time_stamp
;
tcp_tso_set_push
(
skb
);
if
(
!
tcp_transmit_skb
(
sk
,
skb_clone
(
skb
,
sk
->
sk_allocation
)))
{
sk
->
sk_send_head
=
NULL
;
tp
->
snd_nxt
=
TCP_SKB_CB
(
skb
)
->
end_seq
;
tcp_packets_out_inc
(
sk
,
tp
,
skb
);
return
;
}
}
}
void
tcp_set_skb_tso_segs
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
if
(
skb
->
len
<=
tp
->
mss_cache_std
||
!
(
sk
->
sk_route_caps
&
NETIF_F_TSO
))
{
!
(
sk
->
sk_route_caps
&
NETIF_F_TSO
))
{
/* Avoid the costly divide in the normal
/* Avoid the costly divide in the normal
* non-TSO case.
* non-TSO case.
...
@@ -448,10 +417,10 @@ void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
...
@@ -448,10 +417,10 @@ void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
}
else
{
}
else
{
unsigned
int
factor
;
unsigned
int
factor
;
factor
=
skb
->
len
+
(
tp
->
mss_cache
_std
-
1
);
factor
=
skb
->
len
+
(
tp
->
mss_cache
-
1
);
factor
/=
tp
->
mss_cache
_std
;
factor
/=
tp
->
mss_cache
;
skb_shinfo
(
skb
)
->
tso_segs
=
factor
;
skb_shinfo
(
skb
)
->
tso_segs
=
factor
;
skb_shinfo
(
skb
)
->
tso_size
=
tp
->
mss_cache
_std
;
skb_shinfo
(
skb
)
->
tso_size
=
tp
->
mss_cache
;
}
}
}
}
...
@@ -537,6 +506,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
...
@@ -537,6 +506,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
}
}
/* Link BUFF into the send queue. */
/* Link BUFF into the send queue. */
skb_header_release
(
buff
);
__skb_append
(
skb
,
buff
);
__skb_append
(
skb
,
buff
);
return
0
;
return
0
;
...
@@ -657,7 +627,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
...
@@ -657,7 +627,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
/* And store cached results */
/* And store cached results */
tp
->
pmtu_cookie
=
pmtu
;
tp
->
pmtu_cookie
=
pmtu
;
tp
->
mss_cache
=
tp
->
mss_cache_std
=
mss_now
;
tp
->
mss_cache
=
mss_now
;
return
mss_now
;
return
mss_now
;
}
}
...
@@ -669,57 +639,316 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
...
@@ -669,57 +639,316 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
* cannot be large. However, taking into account rare use of URG, this
* cannot be large. However, taking into account rare use of URG, this
* is not a big flaw.
* is not a big flaw.
*/
*/
unsigned
int
tcp_current_mss
(
struct
sock
*
sk
,
int
large_allowed
)
unsigned
int
tcp_current_mss
(
struct
sock
*
sk
,
int
large
)
{
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
dst_entry
*
dst
=
__sk_dst_get
(
sk
);
struct
dst_entry
*
dst
=
__sk_dst_get
(
sk
);
unsigned
int
do_large
,
mss_now
;
u32
mss_now
;
u16
xmit_size_goal
;
int
doing_tso
=
0
;
mss_now
=
tp
->
mss_cache
;
if
(
large_allowed
&&
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
&&
!
tp
->
urg_mode
)
doing_tso
=
1
;
mss_now
=
tp
->
mss_cache_std
;
if
(
dst
)
{
if
(
dst
)
{
u32
mtu
=
dst_mtu
(
dst
);
u32
mtu
=
dst_mtu
(
dst
);
if
(
mtu
!=
tp
->
pmtu_cookie
)
if
(
mtu
!=
tp
->
pmtu_cookie
)
mss_now
=
tcp_sync_mss
(
sk
,
mtu
);
mss_now
=
tcp_sync_mss
(
sk
,
mtu
);
}
}
do_large
=
(
large
&&
if
(
tp
->
rx_opt
.
eff_sacks
)
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
&&
mss_now
-=
(
TCPOLEN_SACK_BASE_ALIGNED
+
!
tp
->
urg_mode
);
(
tp
->
rx_opt
.
eff_sacks
*
TCPOLEN_SACK_PERBLOCK
)
);
if
(
do_large
)
{
xmit_size_goal
=
mss_now
;
unsigned
int
large_mss
,
factor
,
limit
;
large_mss
=
65535
-
tp
->
af_specific
->
net_header_len
-
if
(
doing_tso
)
{
xmit_size_goal
=
65535
-
tp
->
af_specific
->
net_header_len
-
tp
->
ext_header_len
-
tp
->
tcp_header_len
;
tp
->
ext_header_len
-
tp
->
tcp_header_len
;
if
(
tp
->
max_window
&&
large_mss
>
(
tp
->
max_window
>>
1
))
if
(
tp
->
max_window
&&
large_mss
=
max
((
tp
->
max_window
>>
1
),
(
xmit_size_goal
>
(
tp
->
max_window
>>
1
)))
68U
-
tp
->
tcp_header_len
);
xmit_size_goal
=
max
((
tp
->
max_window
>>
1
),
68U
-
tp
->
tcp_header_len
);
xmit_size_goal
-=
(
xmit_size_goal
%
mss_now
);
}
tp
->
xmit_size_goal
=
xmit_size_goal
;
factor
=
large_mss
/
mss_now
;
return
mss_now
;
}
/* Always keep large mss multiple of real mss, but
/* Congestion window validation. (RFC2861) */
* do not exceed 1/tso_win_divisor of the congestion window
* so we can keep the ACK clock ticking and minimize
* bursting.
*/
limit
=
tp
->
snd_cwnd
;
if
(
sysctl_tcp_tso_win_divisor
)
limit
/=
sysctl_tcp_tso_win_divisor
;
limit
=
max
(
1U
,
limit
);
if
(
factor
>
limit
)
factor
=
limit
;
tp
->
mss_cache
=
mss_now
*
factor
;
static
inline
void
tcp_cwnd_validate
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
{
__u32
packets_out
=
tp
->
packets_out
;
if
(
packets_out
>=
tp
->
snd_cwnd
)
{
/* Network is feed fully. */
tp
->
snd_cwnd_used
=
0
;
tp
->
snd_cwnd_stamp
=
tcp_time_stamp
;
}
else
{
/* Network starves. */
if
(
tp
->
packets_out
>
tp
->
snd_cwnd_used
)
tp
->
snd_cwnd_used
=
tp
->
packets_out
;
mss_now
=
tp
->
mss_cache
;
if
((
s32
)(
tcp_time_stamp
-
tp
->
snd_cwnd_stamp
)
>=
tp
->
rto
)
tcp_cwnd_application_limited
(
sk
);
}
}
}
if
(
tp
->
rx_opt
.
eff_sacks
)
static
unsigned
int
tcp_window_allows
(
struct
tcp_sock
*
tp
,
struct
sk_buff
*
skb
,
unsigned
int
mss_now
,
unsigned
int
cwnd
)
mss_now
-=
(
TCPOLEN_SACK_BASE_ALIGNED
+
{
(
tp
->
rx_opt
.
eff_sacks
*
TCPOLEN_SACK_PERBLOCK
));
u32
window
,
cwnd_len
;
return
mss_now
;
window
=
(
tp
->
snd_una
+
tp
->
snd_wnd
-
TCP_SKB_CB
(
skb
)
->
seq
);
cwnd_len
=
mss_now
*
cwnd
;
return
min
(
window
,
cwnd_len
);
}
/* Can at least one segment of SKB be sent right now, according to the
* congestion window rules? If so, return how many segments are allowed.
*/
static
inline
unsigned
int
tcp_cwnd_test
(
struct
tcp_sock
*
tp
,
struct
sk_buff
*
skb
)
{
u32
in_flight
,
cwnd
;
/* Don't be strict about the congestion window for the final FIN. */
if
(
TCP_SKB_CB
(
skb
)
->
flags
&
TCPCB_FLAG_FIN
)
return
1
;
in_flight
=
tcp_packets_in_flight
(
tp
);
cwnd
=
tp
->
snd_cwnd
;
if
(
in_flight
<
cwnd
)
return
(
cwnd
-
in_flight
);
return
0
;
}
/* This must be invoked the first time we consider transmitting
* SKB onto the wire.
*/
static
inline
int
tcp_init_tso_segs
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
)
{
int
tso_segs
=
tcp_skb_pcount
(
skb
);
if
(
!
tso_segs
)
{
tcp_set_skb_tso_segs
(
sk
,
skb
);
tso_segs
=
tcp_skb_pcount
(
skb
);
}
return
tso_segs
;
}
static
inline
int
tcp_minshall_check
(
const
struct
tcp_sock
*
tp
)
{
return
after
(
tp
->
snd_sml
,
tp
->
snd_una
)
&&
!
after
(
tp
->
snd_sml
,
tp
->
snd_nxt
);
}
/* Return 0, if packet can be sent now without violation Nagle's rules:
* 1. It is full sized.
* 2. Or it contains FIN. (already checked by caller)
* 3. Or TCP_NODELAY was set.
* 4. Or TCP_CORK is not set, and all sent packets are ACKed.
* With Minshall's modification: all sent small packets are ACKed.
*/
static
inline
int
tcp_nagle_check
(
const
struct
tcp_sock
*
tp
,
const
struct
sk_buff
*
skb
,
unsigned
mss_now
,
int
nonagle
)
{
return
(
skb
->
len
<
mss_now
&&
((
nonagle
&
TCP_NAGLE_CORK
)
||
(
!
nonagle
&&
tp
->
packets_out
&&
tcp_minshall_check
(
tp
))));
}
/* Return non-zero if the Nagle test allows this packet to be
* sent now.
*/
static
inline
int
tcp_nagle_test
(
struct
tcp_sock
*
tp
,
struct
sk_buff
*
skb
,
unsigned
int
cur_mss
,
int
nonagle
)
{
/* Nagle rule does not apply to frames, which sit in the middle of the
* write_queue (they have no chances to get new data).
*
* This is implemented in the callers, where they modify the 'nonagle'
* argument based upon the location of SKB in the send queue.
*/
if
(
nonagle
&
TCP_NAGLE_PUSH
)
return
1
;
/* Don't use the nagle rule for urgent data (or for the final FIN). */
if
(
tp
->
urg_mode
||
(
TCP_SKB_CB
(
skb
)
->
flags
&
TCPCB_FLAG_FIN
))
return
1
;
if
(
!
tcp_nagle_check
(
tp
,
skb
,
cur_mss
,
nonagle
))
return
1
;
return
0
;
}
/* Does at least the first segment of SKB fit into the send window? */
static
inline
int
tcp_snd_wnd_test
(
struct
tcp_sock
*
tp
,
struct
sk_buff
*
skb
,
unsigned
int
cur_mss
)
{
u32
end_seq
=
TCP_SKB_CB
(
skb
)
->
end_seq
;
if
(
skb
->
len
>
cur_mss
)
end_seq
=
TCP_SKB_CB
(
skb
)
->
seq
+
cur_mss
;
return
!
after
(
end_seq
,
tp
->
snd_una
+
tp
->
snd_wnd
);
}
/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
* should be put on the wire right now. If so, it returns the number of
* packets allowed by the congestion window.
*/
static
unsigned
int
tcp_snd_test
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
,
unsigned
int
cur_mss
,
int
nonagle
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
unsigned
int
cwnd_quota
;
tcp_init_tso_segs
(
sk
,
skb
);
if
(
!
tcp_nagle_test
(
tp
,
skb
,
cur_mss
,
nonagle
))
return
0
;
cwnd_quota
=
tcp_cwnd_test
(
tp
,
skb
);
if
(
cwnd_quota
&&
!
tcp_snd_wnd_test
(
tp
,
skb
,
cur_mss
))
cwnd_quota
=
0
;
return
cwnd_quota
;
}
static
inline
int
tcp_skb_is_last
(
const
struct
sock
*
sk
,
const
struct
sk_buff
*
skb
)
{
return
skb
->
next
==
(
struct
sk_buff
*
)
&
sk
->
sk_write_queue
;
}
int
tcp_may_send_now
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
)
{
struct
sk_buff
*
skb
=
sk
->
sk_send_head
;
return
(
skb
&&
tcp_snd_test
(
sk
,
skb
,
tcp_current_mss
(
sk
,
1
),
(
tcp_skb_is_last
(
sk
,
skb
)
?
TCP_NAGLE_PUSH
:
tp
->
nonagle
)));
}
/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
* which is put after SKB on the list. It is very much like
* tcp_fragment() except that it may make several kinds of assumptions
* in order to speed up the splitting operation. In particular, we
* know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned).
*/
static
int
tso_fragment
(
struct
sock
*
sk
,
struct
sk_buff
*
skb
,
unsigned
int
len
)
{
struct
sk_buff
*
buff
;
int
nlen
=
skb
->
len
-
len
;
u16
flags
;
/* All of a TSO frame must be composed of paged data. */
BUG_ON
(
skb
->
len
!=
skb
->
data_len
);
buff
=
sk_stream_alloc_pskb
(
sk
,
0
,
0
,
GFP_ATOMIC
);
if
(
unlikely
(
buff
==
NULL
))
return
-
ENOMEM
;
buff
->
truesize
=
nlen
;
skb
->
truesize
-=
nlen
;
/* Correct the sequence numbers. */
TCP_SKB_CB
(
buff
)
->
seq
=
TCP_SKB_CB
(
skb
)
->
seq
+
len
;
TCP_SKB_CB
(
buff
)
->
end_seq
=
TCP_SKB_CB
(
skb
)
->
end_seq
;
TCP_SKB_CB
(
skb
)
->
end_seq
=
TCP_SKB_CB
(
buff
)
->
seq
;
/* PSH and FIN should only be set in the second packet. */
flags
=
TCP_SKB_CB
(
skb
)
->
flags
;
TCP_SKB_CB
(
skb
)
->
flags
=
flags
&
~
(
TCPCB_FLAG_FIN
|
TCPCB_FLAG_PSH
);
TCP_SKB_CB
(
buff
)
->
flags
=
flags
;
/* This packet was never sent out yet, so no SACK bits. */
TCP_SKB_CB
(
buff
)
->
sacked
=
0
;
buff
->
ip_summed
=
skb
->
ip_summed
=
CHECKSUM_HW
;
skb_split
(
skb
,
buff
,
len
);
/* Fix up tso_factor for both original and new SKB. */
tcp_set_skb_tso_segs
(
sk
,
skb
);
tcp_set_skb_tso_segs
(
sk
,
buff
);
/* Link BUFF into the send queue. */
skb_header_release
(
buff
);
__skb_append
(
skb
,
buff
);
return
0
;
}
/* Try to defer sending, if possible, in order to minimize the amount
* of TSO splitting we do. View it as a kind of TSO Nagle test.
*
* This algorithm is from John Heffner.
*/
static
int
tcp_tso_should_defer
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
,
struct
sk_buff
*
skb
)
{
u32
send_win
,
cong_win
,
limit
,
in_flight
;
if
(
TCP_SKB_CB
(
skb
)
->
flags
&
TCPCB_FLAG_FIN
)
return
0
;
if
(
tp
->
ca_state
!=
TCP_CA_Open
)
return
0
;
in_flight
=
tcp_packets_in_flight
(
tp
);
BUG_ON
(
tcp_skb_pcount
(
skb
)
<=
1
||
(
tp
->
snd_cwnd
<=
in_flight
));
send_win
=
(
tp
->
snd_una
+
tp
->
snd_wnd
)
-
TCP_SKB_CB
(
skb
)
->
seq
;
/* From in_flight test above, we know that cwnd > in_flight. */
cong_win
=
(
tp
->
snd_cwnd
-
in_flight
)
*
tp
->
mss_cache
;
limit
=
min
(
send_win
,
cong_win
);
/* If sk_send_head can be sent fully now, just do it. */
if
(
skb
->
len
<=
limit
)
return
0
;
if
(
sysctl_tcp_tso_win_divisor
)
{
u32
chunk
=
min
(
tp
->
snd_wnd
,
tp
->
snd_cwnd
*
tp
->
mss_cache
);
/* If at least some fraction of a window is available,
* just use it.
*/
chunk
/=
sysctl_tcp_tso_win_divisor
;
if
(
limit
>=
chunk
)
return
0
;
}
else
{
/* Different approach, try not to defer past a single
* ACK. Receiver should ACK every other full sized
* frame, so if we have space for more than 3 frames
* then send now.
*/
if
(
limit
>
tcp_max_burst
(
tp
)
*
tp
->
mss_cache
)
return
0
;
}
/* Ok, it looks like it is advisable to defer. */
return
1
;
}
}
/* This routine writes packets to the network. It advances the
/* This routine writes packets to the network. It advances the
...
@@ -729,57 +958,158 @@ unsigned int tcp_current_mss(struct sock *sk, int large)
...
@@ -729,57 +958,158 @@ unsigned int tcp_current_mss(struct sock *sk, int large)
* Returns 1, if no segments are in flight and we have queued segments, but
* Returns 1, if no segments are in flight and we have queued segments, but
* cannot send anything now because of SWS or another problem.
* cannot send anything now because of SWS or another problem.
*/
*/
int
tcp_write_xmit
(
struct
sock
*
sk
,
int
nonagle
)
static
int
tcp_write_xmit
(
struct
sock
*
sk
,
unsigned
int
mss_now
,
int
nonagle
)
{
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
unsigned
int
mss_now
;
struct
sk_buff
*
skb
;
unsigned
int
tso_segs
,
sent_pkts
;
int
cwnd_quota
;
/* If we are closed, the bytes will have to remain here.
/* If we are closed, the bytes will have to remain here.
* In time closedown will finish, we empty the write queue and all
* In time closedown will finish, we empty the write queue and all
* will be happy.
* will be happy.
*/
*/
if
(
sk
->
sk_state
!=
TCP_CLOSE
)
{
if
(
unlikely
(
sk
->
sk_state
==
TCP_CLOSE
))
struct
sk_buff
*
skb
;
return
0
;
int
sent_pkts
=
0
;
skb
=
sk
->
sk_send_head
;
if
(
unlikely
(
!
skb
))
return
0
;
tso_segs
=
tcp_init_tso_segs
(
sk
,
skb
);
cwnd_quota
=
tcp_cwnd_test
(
tp
,
skb
);
if
(
unlikely
(
!
cwnd_quota
))
goto
out
;
sent_pkts
=
0
;
while
(
likely
(
tcp_snd_wnd_test
(
tp
,
skb
,
mss_now
)))
{
BUG_ON
(
!
tso_segs
);
if
(
tso_segs
==
1
)
{
if
(
unlikely
(
!
tcp_nagle_test
(
tp
,
skb
,
mss_now
,
(
tcp_skb_is_last
(
sk
,
skb
)
?
nonagle
:
TCP_NAGLE_PUSH
))))
break
;
}
else
{
if
(
tcp_tso_should_defer
(
sk
,
tp
,
skb
))
break
;
}
/* Account for SACKS, we may need to fragment due to this.
if
(
tso_segs
>
1
)
{
* It is just like the real MSS changing on us midstream.
u32
limit
=
tcp_window_allows
(
tp
,
skb
,
* We also handle things correctly when the user adds some
mss_now
,
cwnd_quota
);
* IP options mid-stream. Silly to do, but cover it.
*/
if
(
skb
->
len
<
limit
)
{
mss_now
=
tcp_current_mss
(
sk
,
1
);
unsigned
int
trim
=
skb
->
len
%
mss_now
;
while
((
skb
=
sk
->
sk_send_head
)
&&
if
(
trim
)
tcp_snd_test
(
sk
,
skb
,
mss_now
,
limit
=
skb
->
len
-
trim
;
tcp_skb_is_last
(
sk
,
skb
)
?
nonagle
:
}
TCP_NAGLE_PUSH
))
{
if
(
skb
->
len
>
limit
)
{
if
(
skb
->
len
>
mss_now
)
{
if
(
tso_fragment
(
sk
,
skb
,
limit
))
if
(
tcp_fragment
(
sk
,
skb
,
mss_now
))
break
;
break
;
}
}
}
else
if
(
unlikely
(
skb
->
len
>
mss_now
))
{
TCP_SKB_CB
(
skb
)
->
when
=
tcp_time_stamp
;
if
(
unlikely
(
tcp_fragment
(
sk
,
skb
,
mss_now
)))
tcp_tso_set_push
(
skb
);
if
(
tcp_transmit_skb
(
sk
,
skb_clone
(
skb
,
GFP_ATOMIC
)))
break
;
break
;
}
/* Advance the send_head. This one is sent out.
TCP_SKB_CB
(
skb
)
->
when
=
tcp_time_stamp
;
* This call will increment packets_out.
*/
if
(
unlikely
(
tcp_transmit_skb
(
sk
,
skb_clone
(
skb
,
GFP_ATOMIC
))))
update_send_head
(
sk
,
tp
,
skb
);
break
;
/* Advance the send_head. This one is sent out.
* This call will increment packets_out.
*/
update_send_head
(
sk
,
tp
,
skb
);
tcp_minshall_update
(
tp
,
mss_now
,
skb
);
sent_pkts
++
;
/* Do not optimize this to use tso_segs. If we chopped up
* the packet above, tso_segs will no longer be valid.
*/
cwnd_quota
-=
tcp_skb_pcount
(
skb
);
BUG_ON
(
cwnd_quota
<
0
);
if
(
!
cwnd_quota
)
break
;
skb
=
sk
->
sk_send_head
;
if
(
!
skb
)
break
;
tso_segs
=
tcp_init_tso_segs
(
sk
,
skb
);
}
if
(
likely
(
sent_pkts
))
{
tcp_cwnd_validate
(
sk
,
tp
);
return
0
;
}
out:
return
!
tp
->
packets_out
&&
sk
->
sk_send_head
;
}
/* Push out any pending frames which were held back due to
* TCP_CORK or attempt at coalescing tiny packets.
* The socket must be locked by the caller.
*/
void
__tcp_push_pending_frames
(
struct
sock
*
sk
,
struct
tcp_sock
*
tp
,
unsigned
int
cur_mss
,
int
nonagle
)
{
struct
sk_buff
*
skb
=
sk
->
sk_send_head
;
tcp_minshall_update
(
tp
,
mss_now
,
skb
);
if
(
skb
)
{
sent_pkts
=
1
;
if
(
tcp_write_xmit
(
sk
,
cur_mss
,
nonagle
))
tcp_check_probe_timer
(
sk
,
tp
);
}
}
/* Send _single_ skb sitting at the send head. This function requires
* true push pending frames to setup probe timer etc.
*/
void
tcp_push_one
(
struct
sock
*
sk
,
unsigned
int
mss_now
)
{
struct
tcp_sock
*
tp
=
tcp_sk
(
sk
);
struct
sk_buff
*
skb
=
sk
->
sk_send_head
;
unsigned
int
tso_segs
,
cwnd_quota
;
BUG_ON
(
!
skb
||
skb
->
len
<
mss_now
);
tso_segs
=
tcp_init_tso_segs
(
sk
,
skb
);
cwnd_quota
=
tcp_snd_test
(
sk
,
skb
,
mss_now
,
TCP_NAGLE_PUSH
);
if
(
likely
(
cwnd_quota
))
{
BUG_ON
(
!
tso_segs
);
if
(
tso_segs
>
1
)
{
u32
limit
=
tcp_window_allows
(
tp
,
skb
,
mss_now
,
cwnd_quota
);
if
(
skb
->
len
<
limit
)
{
unsigned
int
trim
=
skb
->
len
%
mss_now
;
if
(
trim
)
limit
=
skb
->
len
-
trim
;
}
if
(
skb
->
len
>
limit
)
{
if
(
unlikely
(
tso_fragment
(
sk
,
skb
,
limit
)))
return
;
}
}
else
if
(
unlikely
(
skb
->
len
>
mss_now
))
{
if
(
unlikely
(
tcp_fragment
(
sk
,
skb
,
mss_now
)))
return
;
}
}
if
(
sent_pkts
)
{
/* Send it out now. */
TCP_SKB_CB
(
skb
)
->
when
=
tcp_time_stamp
;
if
(
likely
(
!
tcp_transmit_skb
(
sk
,
skb_clone
(
skb
,
sk
->
sk_allocation
))))
{
update_send_head
(
sk
,
tp
,
skb
);
tcp_cwnd_validate
(
sk
,
tp
);
tcp_cwnd_validate
(
sk
,
tp
);
return
0
;
return
;
}
}
return
!
tp
->
packets_out
&&
sk
->
sk_send_head
;
}
}
return
0
;
}
}
/* This function returns the amount that we can raise the
/* This function returns the amount that we can raise the
...
@@ -1039,7 +1369,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
...
@@ -1039,7 +1369,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
{
if
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
{
sk
->
sk_route_caps
&=
~
NETIF_F_TSO
;
sk
->
sk_route_caps
&=
~
NETIF_F_TSO
;
sock_set_flag
(
sk
,
SOCK_NO_LARGESEND
);
sock_set_flag
(
sk
,
SOCK_NO_LARGESEND
);
tp
->
mss_cache
=
tp
->
mss_cache_std
;
}
}
if
(
tcp_trim_head
(
sk
,
skb
,
tp
->
snd_una
-
TCP_SKB_CB
(
skb
)
->
seq
))
if
(
tcp_trim_head
(
sk
,
skb
,
tp
->
snd_una
-
TCP_SKB_CB
(
skb
)
->
seq
))
...
@@ -1101,7 +1430,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
...
@@ -1101,7 +1430,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* is still in somebody's hands, else make a clone.
* is still in somebody's hands, else make a clone.
*/
*/
TCP_SKB_CB
(
skb
)
->
when
=
tcp_time_stamp
;
TCP_SKB_CB
(
skb
)
->
when
=
tcp_time_stamp
;
tcp_tso_set_push
(
skb
);
err
=
tcp_transmit_skb
(
sk
,
(
skb_cloned
(
skb
)
?
err
=
tcp_transmit_skb
(
sk
,
(
skb_cloned
(
skb
)
?
pskb_copy
(
skb
,
GFP_ATOMIC
)
:
pskb_copy
(
skb
,
GFP_ATOMIC
)
:
...
@@ -1670,14 +1998,12 @@ int tcp_write_wakeup(struct sock *sk)
...
@@ -1670,14 +1998,12 @@ int tcp_write_wakeup(struct sock *sk)
if
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
{
if
(
sk
->
sk_route_caps
&
NETIF_F_TSO
)
{
sock_set_flag
(
sk
,
SOCK_NO_LARGESEND
);
sock_set_flag
(
sk
,
SOCK_NO_LARGESEND
);
sk
->
sk_route_caps
&=
~
NETIF_F_TSO
;
sk
->
sk_route_caps
&=
~
NETIF_F_TSO
;
tp
->
mss_cache
=
tp
->
mss_cache_std
;
}
}
}
else
if
(
!
tcp_skb_pcount
(
skb
))
}
else
if
(
!
tcp_skb_pcount
(
skb
))
tcp_set_skb_tso_segs
(
sk
,
skb
);
tcp_set_skb_tso_segs
(
sk
,
skb
);
TCP_SKB_CB
(
skb
)
->
flags
|=
TCPCB_FLAG_PSH
;
TCP_SKB_CB
(
skb
)
->
flags
|=
TCPCB_FLAG_PSH
;
TCP_SKB_CB
(
skb
)
->
when
=
tcp_time_stamp
;
TCP_SKB_CB
(
skb
)
->
when
=
tcp_time_stamp
;
tcp_tso_set_push
(
skb
);
err
=
tcp_transmit_skb
(
sk
,
skb_clone
(
skb
,
GFP_ATOMIC
));
err
=
tcp_transmit_skb
(
sk
,
skb_clone
(
skb
,
GFP_ATOMIC
));
if
(
!
err
)
{
if
(
!
err
)
{
update_send_head
(
sk
,
tp
,
skb
);
update_send_head
(
sk
,
tp
,
skb
);
...
...
net/ipv6/af_inet6.c
浏览文件 @
5432ebb5
...
@@ -774,7 +774,6 @@ static int __init inet6_init(void)
...
@@ -774,7 +774,6 @@ static int __init inet6_init(void)
if
(
if6_proc_init
())
if
(
if6_proc_init
())
goto
proc_if6_fail
;
goto
proc_if6_fail
;
#endif
#endif
ipv6_packet_init
();
ip6_route_init
();
ip6_route_init
();
ip6_flowlabel_init
();
ip6_flowlabel_init
();
err
=
addrconf_init
();
err
=
addrconf_init
();
...
@@ -791,6 +790,8 @@ static int __init inet6_init(void)
...
@@ -791,6 +790,8 @@ static int __init inet6_init(void)
/* Init v6 transport protocols. */
/* Init v6 transport protocols. */
udpv6_init
();
udpv6_init
();
tcpv6_init
();
tcpv6_init
();
ipv6_packet_init
();
err
=
0
;
err
=
0
;
out:
out:
return
err
;
return
err
;
...
@@ -798,7 +799,6 @@ static int __init inet6_init(void)
...
@@ -798,7 +799,6 @@ static int __init inet6_init(void)
addrconf_fail:
addrconf_fail:
ip6_flowlabel_cleanup
();
ip6_flowlabel_cleanup
();
ip6_route_cleanup
();
ip6_route_cleanup
();
ipv6_packet_cleanup
();
#ifdef CONFIG_PROC_FS
#ifdef CONFIG_PROC_FS
if6_proc_exit
();
if6_proc_exit
();
proc_if6_fail:
proc_if6_fail:
...
...
net/ipv6/ip6_output.c
浏览文件 @
5432ebb5
...
@@ -465,7 +465,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
...
@@ -465,7 +465,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to
->
pkt_type
=
from
->
pkt_type
;
to
->
pkt_type
=
from
->
pkt_type
;
to
->
priority
=
from
->
priority
;
to
->
priority
=
from
->
priority
;
to
->
protocol
=
from
->
protocol
;
to
->
protocol
=
from
->
protocol
;
to
->
security
=
from
->
security
;
dst_release
(
to
->
dst
);
dst_release
(
to
->
dst
);
to
->
dst
=
dst_clone
(
from
->
dst
);
to
->
dst
=
dst_clone
(
from
->
dst
);
to
->
dev
=
from
->
dev
;
to
->
dev
=
from
->
dev
;
...
...
net/ipv6/tcp_ipv6.c
浏览文件 @
5432ebb5
...
@@ -2018,7 +2018,7 @@ static int tcp_v6_init_sock(struct sock *sk)
...
@@ -2018,7 +2018,7 @@ static int tcp_v6_init_sock(struct sock *sk)
*/
*/
tp
->
snd_ssthresh
=
0x7fffffff
;
tp
->
snd_ssthresh
=
0x7fffffff
;
tp
->
snd_cwnd_clamp
=
~
0
;
tp
->
snd_cwnd_clamp
=
~
0
;
tp
->
mss_cache
_std
=
tp
->
mss_cache
=
536
;
tp
->
mss_cache
=
536
;
tp
->
reordering
=
sysctl_tcp_reordering
;
tp
->
reordering
=
sysctl_tcp_reordering
;
...
...
net/sched/Makefile
浏览文件 @
5432ebb5
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
obj-y
:=
sch_generic.o
obj-y
:=
sch_generic.o
obj-$(CONFIG_NET_SCHED)
+=
sch_api.o sch_fifo.o
obj-$(CONFIG_NET_SCHED)
+=
sch_api.o sch_fifo.o
sch_blackhole.o
obj-$(CONFIG_NET_CLS)
+=
cls_api.o
obj-$(CONFIG_NET_CLS)
+=
cls_api.o
obj-$(CONFIG_NET_CLS_ACT)
+=
act_api.o
obj-$(CONFIG_NET_CLS_ACT)
+=
act_api.o
obj-$(CONFIG_NET_ACT_POLICE)
+=
police.o
obj-$(CONFIG_NET_ACT_POLICE)
+=
police.o
...
...
net/sched/em_meta.c
浏览文件 @
5432ebb5
...
@@ -205,11 +205,6 @@ META_COLLECTOR(int_protocol)
...
@@ -205,11 +205,6 @@ META_COLLECTOR(int_protocol)
dst
->
value
=
skb
->
protocol
;
dst
->
value
=
skb
->
protocol
;
}
}
META_COLLECTOR
(
int_security
)
{
dst
->
value
=
skb
->
security
;
}
META_COLLECTOR
(
int_pkttype
)
META_COLLECTOR
(
int_pkttype
)
{
{
dst
->
value
=
skb
->
pkt_type
;
dst
->
value
=
skb
->
pkt_type
;
...
@@ -524,7 +519,6 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
...
@@ -524,7 +519,6 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
[
META_ID
(
REALDEV
)]
=
META_FUNC
(
int_realdev
),
[
META_ID
(
REALDEV
)]
=
META_FUNC
(
int_realdev
),
[
META_ID
(
PRIORITY
)]
=
META_FUNC
(
int_priority
),
[
META_ID
(
PRIORITY
)]
=
META_FUNC
(
int_priority
),
[
META_ID
(
PROTOCOL
)]
=
META_FUNC
(
int_protocol
),
[
META_ID
(
PROTOCOL
)]
=
META_FUNC
(
int_protocol
),
[
META_ID
(
SECURITY
)]
=
META_FUNC
(
int_security
),
[
META_ID
(
PKTTYPE
)]
=
META_FUNC
(
int_pkttype
),
[
META_ID
(
PKTTYPE
)]
=
META_FUNC
(
int_pkttype
),
[
META_ID
(
PKTLEN
)]
=
META_FUNC
(
int_pktlen
),
[
META_ID
(
PKTLEN
)]
=
META_FUNC
(
int_pktlen
),
[
META_ID
(
DATALEN
)]
=
META_FUNC
(
int_datalen
),
[
META_ID
(
DATALEN
)]
=
META_FUNC
(
int_datalen
),
...
...
net/sched/sch_api.c
浏览文件 @
5432ebb5
...
@@ -399,10 +399,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
...
@@ -399,10 +399,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
{
{
int
err
;
int
err
;
struct
rtattr
*
kind
=
tca
[
TCA_KIND
-
1
];
struct
rtattr
*
kind
=
tca
[
TCA_KIND
-
1
];
void
*
p
=
NULL
;
struct
Qdisc
*
sch
;
struct
Qdisc
*
sch
;
struct
Qdisc_ops
*
ops
;
struct
Qdisc_ops
*
ops
;
int
size
;
ops
=
qdisc_lookup_ops
(
kind
);
ops
=
qdisc_lookup_ops
(
kind
);
#ifdef CONFIG_KMOD
#ifdef CONFIG_KMOD
...
@@ -437,64 +435,55 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
...
@@ -437,64 +435,55 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
if
(
ops
==
NULL
)
if
(
ops
==
NULL
)
goto
err_out
;
goto
err_out
;
/* ensure that the Qdisc and the private data are 32-byte aligned */
sch
=
qdisc_alloc
(
dev
,
ops
);
size
=
((
sizeof
(
*
sch
)
+
QDISC_ALIGN_CONST
)
&
~
QDISC_ALIGN_CONST
);
if
(
IS_ERR
(
sch
))
{
size
+=
ops
->
priv_size
+
QDISC_ALIGN_CONST
;
err
=
PTR_ERR
(
sch
);
p
=
kmalloc
(
size
,
GFP_KERNEL
);
err
=
-
ENOBUFS
;
if
(
!
p
)
goto
err_out2
;
goto
err_out2
;
memset
(
p
,
0
,
size
);
}
sch
=
(
struct
Qdisc
*
)(((
unsigned
long
)
p
+
QDISC_ALIGN_CONST
)
&
~
QDISC_ALIGN_CONST
);
sch
->
padded
=
(
char
*
)
sch
-
(
char
*
)
p
;
INIT_LIST_HEAD
(
&
sch
->
list
);
skb_queue_head_init
(
&
sch
->
q
);
if
(
handle
==
TC_H_INGRESS
)
if
(
handle
==
TC_H_INGRESS
)
{
sch
->
flags
|=
TCQ_F_INGRESS
;
sch
->
flags
|=
TCQ_F_INGRESS
;
handle
=
TC_H_MAKE
(
TC_H_INGRESS
,
0
);
sch
->
ops
=
ops
;
}
else
if
(
handle
==
0
)
{
sch
->
enqueue
=
ops
->
enqueue
;
sch
->
dequeue
=
ops
->
dequeue
;
sch
->
dev
=
dev
;
dev_hold
(
dev
);
atomic_set
(
&
sch
->
refcnt
,
1
);
sch
->
stats_lock
=
&
dev
->
queue_lock
;
if
(
handle
==
0
)
{
handle
=
qdisc_alloc_handle
(
dev
);
handle
=
qdisc_alloc_handle
(
dev
);
err
=
-
ENOMEM
;
err
=
-
ENOMEM
;
if
(
handle
==
0
)
if
(
handle
==
0
)
goto
err_out3
;
goto
err_out3
;
}
}
if
(
handle
==
TC_H_INGRESS
)
sch
->
handle
=
handle
;
sch
->
handle
=
TC_H_MAKE
(
TC_H_INGRESS
,
0
);
else
sch
->
handle
=
handle
;
if
(
!
ops
->
init
||
(
err
=
ops
->
init
(
sch
,
tca
[
TCA_OPTIONS
-
1
]))
==
0
)
{
if
(
!
ops
->
init
||
(
err
=
ops
->
init
(
sch
,
tca
[
TCA_OPTIONS
-
1
]))
==
0
)
{
#ifdef CONFIG_NET_ESTIMATOR
if
(
tca
[
TCA_RATE
-
1
])
{
err
=
gen_new_estimator
(
&
sch
->
bstats
,
&
sch
->
rate_est
,
sch
->
stats_lock
,
tca
[
TCA_RATE
-
1
]);
if
(
err
)
{
/*
* Any broken qdiscs that would require
* a ops->reset() here? The qdisc was never
* in action so it shouldn't be necessary.
*/
if
(
ops
->
destroy
)
ops
->
destroy
(
sch
);
goto
err_out3
;
}
}
#endif
qdisc_lock_tree
(
dev
);
qdisc_lock_tree
(
dev
);
list_add_tail
(
&
sch
->
list
,
&
dev
->
qdisc_list
);
list_add_tail
(
&
sch
->
list
,
&
dev
->
qdisc_list
);
qdisc_unlock_tree
(
dev
);
qdisc_unlock_tree
(
dev
);
#ifdef CONFIG_NET_ESTIMATOR
if
(
tca
[
TCA_RATE
-
1
])
gen_new_estimator
(
&
sch
->
bstats
,
&
sch
->
rate_est
,
sch
->
stats_lock
,
tca
[
TCA_RATE
-
1
]);
#endif
return
sch
;
return
sch
;
}
}
err_out3:
err_out3:
dev_put
(
dev
);
dev_put
(
dev
);
kfree
((
char
*
)
sch
-
sch
->
padded
);
err_out2:
err_out2:
module_put
(
ops
->
owner
);
module_put
(
ops
->
owner
);
err_out:
err_out:
*
errp
=
err
;
*
errp
=
err
;
if
(
p
)
kfree
(
p
);
return
NULL
;
return
NULL
;
}
}
...
...
net/sched/sch_blackhole.c
0 → 100644
浏览文件 @
5432ebb5
/*
* net/sched/sch_blackhole.c Black hole queue
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Thomas Graf <tgraf@suug.ch>
*
* Note: Quantum tunneling is not supported.
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
static
int
blackhole_enqueue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
qdisc_drop
(
skb
,
sch
);
return
NET_XMIT_SUCCESS
;
}
static
struct
sk_buff
*
blackhole_dequeue
(
struct
Qdisc
*
sch
)
{
return
NULL
;
}
static
struct
Qdisc_ops
blackhole_qdisc_ops
=
{
.
id
=
"blackhole"
,
.
priv_size
=
0
,
.
enqueue
=
blackhole_enqueue
,
.
dequeue
=
blackhole_dequeue
,
.
owner
=
THIS_MODULE
,
};
static
int
__init
blackhole_module_init
(
void
)
{
return
register_qdisc
(
&
blackhole_qdisc_ops
);
}
static
void
__exit
blackhole_module_exit
(
void
)
{
unregister_qdisc
(
&
blackhole_qdisc_ops
);
}
module_init
(
blackhole_module_init
)
module_exit
(
blackhole_module_exit
)
MODULE_LICENSE
(
"GPL"
);
net/sched/sch_generic.c
浏览文件 @
5432ebb5
...
@@ -395,24 +395,23 @@ static struct Qdisc_ops pfifo_fast_ops = {
...
@@ -395,24 +395,23 @@ static struct Qdisc_ops pfifo_fast_ops = {
.
owner
=
THIS_MODULE
,
.
owner
=
THIS_MODULE
,
};
};
struct
Qdisc
*
qdisc_create_dflt
(
struct
net_device
*
dev
,
struct
Qdisc_ops
*
ops
)
struct
Qdisc
*
qdisc_alloc
(
struct
net_device
*
dev
,
struct
Qdisc_ops
*
ops
)
{
{
void
*
p
;
void
*
p
;
struct
Qdisc
*
sch
;
struct
Qdisc
*
sch
;
int
size
;
unsigned
int
size
;
int
err
=
-
ENOBUFS
;
/* ensure that the Qdisc and the private data are 32-byte aligned */
/* ensure that the Qdisc and the private data are 32-byte aligned */
size
=
((
sizeof
(
*
sch
)
+
QDISC_ALIGN_CONST
)
&
~
QDISC_ALIGN_CONST
);
size
=
QDISC_ALIGN
(
sizeof
(
*
sch
)
);
size
+=
ops
->
priv_size
+
QDISC_ALIGN_CONST
;
size
+=
ops
->
priv_size
+
(
QDISC_ALIGNTO
-
1
)
;
p
=
kmalloc
(
size
,
GFP_KERNEL
);
p
=
kmalloc
(
size
,
GFP_KERNEL
);
if
(
!
p
)
if
(
!
p
)
return
NULL
;
goto
errout
;
memset
(
p
,
0
,
size
);
memset
(
p
,
0
,
size
);
sch
=
(
struct
Qdisc
*
)
QDISC_ALIGN
((
unsigned
long
)
p
);
sch
=
(
struct
Qdisc
*
)(((
unsigned
long
)
p
+
QDISC_ALIGN_CONST
)
sch
->
padded
=
(
char
*
)
sch
-
(
char
*
)
p
;
&
~
QDISC_ALIGN_CONST
);
sch
->
padded
=
(
char
*
)
sch
-
(
char
*
)
p
;
INIT_LIST_HEAD
(
&
sch
->
list
);
INIT_LIST_HEAD
(
&
sch
->
list
);
skb_queue_head_init
(
&
sch
->
q
);
skb_queue_head_init
(
&
sch
->
q
);
...
@@ -423,11 +422,24 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
...
@@ -423,11 +422,24 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
dev_hold
(
dev
);
dev_hold
(
dev
);
sch
->
stats_lock
=
&
dev
->
queue_lock
;
sch
->
stats_lock
=
&
dev
->
queue_lock
;
atomic_set
(
&
sch
->
refcnt
,
1
);
atomic_set
(
&
sch
->
refcnt
,
1
);
return
sch
;
errout:
return
ERR_PTR
(
-
err
);
}
struct
Qdisc
*
qdisc_create_dflt
(
struct
net_device
*
dev
,
struct
Qdisc_ops
*
ops
)
{
struct
Qdisc
*
sch
;
sch
=
qdisc_alloc
(
dev
,
ops
);
if
(
IS_ERR
(
sch
))
goto
errout
;
if
(
!
ops
->
init
||
ops
->
init
(
sch
,
NULL
)
==
0
)
if
(
!
ops
->
init
||
ops
->
init
(
sch
,
NULL
)
==
0
)
return
sch
;
return
sch
;
dev_put
(
dev
);
errout:
kfree
(
p
);
return
NULL
;
return
NULL
;
}
}
...
@@ -591,6 +603,7 @@ EXPORT_SYMBOL(__netdev_watchdog_up);
...
@@ -591,6 +603,7 @@ EXPORT_SYMBOL(__netdev_watchdog_up);
EXPORT_SYMBOL
(
noop_qdisc
);
EXPORT_SYMBOL
(
noop_qdisc
);
EXPORT_SYMBOL
(
noop_qdisc_ops
);
EXPORT_SYMBOL
(
noop_qdisc_ops
);
EXPORT_SYMBOL
(
qdisc_create_dflt
);
EXPORT_SYMBOL
(
qdisc_create_dflt
);
EXPORT_SYMBOL
(
qdisc_alloc
);
EXPORT_SYMBOL
(
qdisc_destroy
);
EXPORT_SYMBOL
(
qdisc_destroy
);
EXPORT_SYMBOL
(
qdisc_reset
);
EXPORT_SYMBOL
(
qdisc_reset
);
EXPORT_SYMBOL
(
qdisc_restart
);
EXPORT_SYMBOL
(
qdisc_restart
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录