提交 27b75c95 编写于 作者: E Eric Dumazet 提交者: David S. Miller

net: avoid RCU for NOCACHE dst

There is no point using RCU for dst we allocate for a very short time
(used once).

Change dst_release() to take DST_NOCACHE into account, but also change
skb_dst_set_noref() to force a refcount increment for such dst.

This is a _huge_ gain, because we dont waste memory to store xx thousand
of dsts. Instead of queueing them to RCU, we can free them instantly.

CPU caches can stay hot, re-using same memory blocks to hold temporary
dsts.

Note : remove unneeded smp_mb__before_atomic_dec(); in dst_release(),
since atomic_dec_return() implies a full memory barrier.

Stress test, 160.000.000 udp frames sent, IP route cache disabled
(DDOS).

Before:

real    0m38.091s
user    0m13.189s
sys     7m53.018s

After:

real	0m29.946s
user	0m12.157s
sys	7m40.605s

For reference, if IP route cache was enabled :

real	0m32.030s
user	0m10.521s
sys	8m15.243s
Signed-off-by: NEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 e6484930
...@@ -460,19 +460,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) ...@@ -460,19 +460,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
skb->_skb_refdst = (unsigned long)dst; skb->_skb_refdst = (unsigned long)dst;
} }
/** extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst);
* skb_dst_set_noref - sets skb dst, without a reference
* @skb: buffer
* @dst: dst entry
*
* Sets skb dst, assuming a reference was not taken on dst
* skb_dst_drop() should not dst_release() this dst
*/
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
/** /**
* skb_dst_is_noref - Test if skb dst isnt refcounted * skb_dst_is_noref - Test if skb dst isnt refcounted
......
...@@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst) ...@@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst)
if (dst) { if (dst) {
int newrefcnt; int newrefcnt;
smp_mb__before_atomic_dec();
newrefcnt = atomic_dec_return(&dst->__refcnt); newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0); WARN_ON(newrefcnt < 0);
if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
dst = dst_destroy(dst);
if (dst)
__dst_free(dst);
}
} }
} }
EXPORT_SYMBOL(dst_release); EXPORT_SYMBOL(dst_release);
/**
* skb_dst_set_noref - sets skb dst, without a reference
* @skb: buffer
* @dst: dst entry
*
* Sets skb dst, assuming a reference was not taken on dst
* skb_dst_drop() should not dst_release() this dst
*/
void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
{
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
/* If dst not in cache, we must take a reference, because
* dst_release() will destroy dst as soon as its refcount becomes zero
*/
if (unlikely(dst->flags & DST_NOCACHE)) {
dst_hold(dst);
skb_dst_set(skb, dst);
} else {
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
}
EXPORT_SYMBOL(skb_dst_set_noref);
/* Dirty hack. We did it in 2.2 (in __dst_free), /* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat * we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice * this mistake in 2.3, but we have no choice
......
...@@ -1105,9 +1105,9 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, ...@@ -1105,9 +1105,9 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt,
* Note that we do rt_free on this new route entry, so that * Note that we do rt_free on this new route entry, so that
* once its refcount hits zero, we are still able to reap it * once its refcount hits zero, we are still able to reap it
* (Thanks Alexey) * (Thanks Alexey)
* Note also the rt_free uses call_rcu. We don't actually * Note: To avoid expensive rcu stuff for this uncached dst,
* need rcu protection here, this is just our path to get * we set DST_NOCACHE so that dst_release() can free dst without
* on the route gc list. * waiting a grace period.
*/ */
rt->dst.flags |= DST_NOCACHE; rt->dst.flags |= DST_NOCACHE;
...@@ -1117,12 +1117,11 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, ...@@ -1117,12 +1117,11 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt,
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_WARNING printk(KERN_WARNING
"Neighbour table failure & not caching routes.\n"); "Neighbour table failure & not caching routes.\n");
rt_drop(rt); ip_rt_put(rt);
return err; return err;
} }
} }
rt_free(rt);
goto skip_hashing; goto skip_hashing;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册