提交 bf6a119e 编写于 作者: D David S. Miller

Merge branch 'ipv6_dev_get_saddr-rcu'

Eric Dumazet says:

====================
ipv6: ipv6_dev_get_saddr() rcu works

Sending IPv6 udp packets on non connected sockets is quite slow,
because ipv6_dev_get_saddr() is still using an rwlock and silly
references games on ifa.

Tested:

$ ./super_netperf 16 -H 4444::555:0786 -l 2000 -t UDP_STREAM -- -m 100 &
[1] 12527

Performance is boosted from 2.02 Mpps to 4.28 Mpps

Kernel profile before patches :
  22.62%  [kernel]  [k] _raw_read_lock_bh
   7.04%  [kernel]  [k] refcount_sub_and_test
   6.56%  [kernel]  [k] ipv6_get_saddr_eval
   5.67%  [kernel]  [k] _raw_read_unlock_bh
   5.34%  [kernel]  [k] __ipv6_dev_get_saddr
   4.95%  [kernel]  [k] refcount_inc_not_zero
   4.03%  [kernel]  [k] __ip6addrlbl_match
   3.70%  [kernel]  [k] _raw_spin_lock
   3.44%  [kernel]  [k] ipv6_dev_get_saddr
   3.24%  [kernel]  [k] ip6_pol_route
   3.06%  [kernel]  [k] refcount_add_not_zero
   2.30%  [kernel]  [k] __local_bh_enable_ip
   1.81%  [kernel]  [k] mlx4_en_xmit
   1.20%  [kernel]  [k] __ip6_append_data
   1.12%  [kernel]  [k] __ip6_make_skb
   1.11%  [kernel]  [k] __dev_queue_xmit
   1.06%  [kernel]  [k] l3mdev_master_ifindex_rcu

Kernel profile after patches :
  11.36%  [kernel]  [k] ip6_pol_route
   7.65%  [kernel]  [k] _raw_spin_lock
   7.16%  [kernel]  [k] __ipv6_dev_get_saddr
   6.49%  [kernel]  [k] ipv6_get_saddr_eval
   6.04%  [kernel]  [k] refcount_add_not_zero
   3.34%  [kernel]  [k] __ip6addrlbl_match
   2.62%  [kernel]  [k] __dev_queue_xmit
   2.37%  [kernel]  [k] mlx4_en_xmit
   2.26%  [kernel]  [k] dst_release
   1.89%  [kernel]  [k] __ip6_make_skb
   1.87%  [kernel]  [k] __ip6_append_data
   1.86%  [kernel]  [k] udpv6_sendmsg
   1.86%  [kernel]  [k] ip6t_do_table
   1.64%  [kernel]  [k] ipv6_dev_get_saddr
   1.64%  [kernel]  [k] find_match
   1.51%  [kernel]  [k] l3mdev_master_ifindex_rcu
   1.24%  [kernel]  [k] ipv6_addr_label
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -152,7 +152,7 @@ static void ipv6_regen_rndid(struct inet6_dev *idev); ...@@ -152,7 +152,7 @@ static void ipv6_regen_rndid(struct inet6_dev *idev);
static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
static int ipv6_count_addresses(struct inet6_dev *idev); static int ipv6_count_addresses(const struct inet6_dev *idev);
static int ipv6_generate_stable_address(struct in6_addr *addr, static int ipv6_generate_stable_address(struct in6_addr *addr,
u8 dad_count, u8 dad_count,
const struct inet6_dev *idev); const struct inet6_dev *idev);
...@@ -945,7 +945,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) ...@@ -945,7 +945,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
break; break;
} }
list_add_tail(&ifp->if_list, p); list_add_tail_rcu(&ifp->if_list, p);
} }
static u32 inet6_addr_hash(const struct in6_addr *addr) static u32 inet6_addr_hash(const struct in6_addr *addr)
...@@ -1204,7 +1204,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ...@@ -1204,7 +1204,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE)) if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
action = check_cleanup_prefix_route(ifp, &expires); action = check_cleanup_prefix_route(ifp, &expires);
list_del_init(&ifp->if_list); list_del_rcu(&ifp->if_list);
__in6_ifa_put(ifp); __in6_ifa_put(ifp);
write_unlock_bh(&ifp->idev->lock); write_unlock_bh(&ifp->idev->lock);
...@@ -1558,8 +1558,7 @@ static int __ipv6_dev_get_saddr(struct net *net, ...@@ -1558,8 +1558,7 @@ static int __ipv6_dev_get_saddr(struct net *net,
{ {
struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx]; struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
read_lock_bh(&idev->lock); list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
int i; int i;
/* /*
...@@ -1609,11 +1608,6 @@ static int __ipv6_dev_get_saddr(struct net *net, ...@@ -1609,11 +1608,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
} }
break; break;
} else if (minihiscore < miniscore) { } else if (minihiscore < miniscore) {
if (hiscore->ifa)
in6_ifa_put(hiscore->ifa);
in6_ifa_hold(score->ifa);
swap(hiscore, score); swap(hiscore, score);
hiscore_idx = 1 - hiscore_idx; hiscore_idx = 1 - hiscore_idx;
...@@ -1625,7 +1619,6 @@ static int __ipv6_dev_get_saddr(struct net *net, ...@@ -1625,7 +1619,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
} }
} }
out: out:
read_unlock_bh(&idev->lock);
return hiscore_idx; return hiscore_idx;
} }
...@@ -1662,6 +1655,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, ...@@ -1662,6 +1655,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
int dst_type; int dst_type;
bool use_oif_addr = false; bool use_oif_addr = false;
int hiscore_idx = 0; int hiscore_idx = 0;
int ret = 0;
dst_type = __ipv6_addr_type(daddr); dst_type = __ipv6_addr_type(daddr);
dst.addr = daddr; dst.addr = daddr;
...@@ -1737,15 +1731,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, ...@@ -1737,15 +1731,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
} }
out: out:
rcu_read_unlock();
hiscore = &scores[hiscore_idx]; hiscore = &scores[hiscore_idx];
if (!hiscore->ifa) if (!hiscore->ifa)
return -EADDRNOTAVAIL; ret = -EADDRNOTAVAIL;
else
*saddr = hiscore->ifa->addr;
*saddr = hiscore->ifa->addr; rcu_read_unlock();
in6_ifa_put(hiscore->ifa); return ret;
return 0;
} }
EXPORT_SYMBOL(ipv6_dev_get_saddr); EXPORT_SYMBOL(ipv6_dev_get_saddr);
...@@ -1785,15 +1778,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, ...@@ -1785,15 +1778,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
return err; return err;
} }
static int ipv6_count_addresses(struct inet6_dev *idev) static int ipv6_count_addresses(const struct inet6_dev *idev)
{ {
const struct inet6_ifaddr *ifp;
int cnt = 0; int cnt = 0;
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock); rcu_read_lock();
list_for_each_entry(ifp, &idev->addr_list, if_list) list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
cnt++; cnt++;
read_unlock_bh(&idev->lock); rcu_read_unlock();
return cnt; return cnt;
} }
...@@ -1859,20 +1852,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, ...@@ -1859,20 +1852,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
bool ipv6_chk_custom_prefix(const struct in6_addr *addr, bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
const unsigned int prefix_len, struct net_device *dev) const unsigned int prefix_len, struct net_device *dev)
{ {
struct inet6_dev *idev; const struct inet6_ifaddr *ifa;
struct inet6_ifaddr *ifa; const struct inet6_dev *idev;
bool ret = false; bool ret = false;
rcu_read_lock(); rcu_read_lock();
idev = __in6_dev_get(dev); idev = __in6_dev_get(dev);
if (idev) { if (idev) {
read_lock_bh(&idev->lock); list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
list_for_each_entry(ifa, &idev->addr_list, if_list) {
ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
if (ret) if (ret)
break; break;
} }
read_unlock_bh(&idev->lock);
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -1882,22 +1873,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix); ...@@ -1882,22 +1873,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix);
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{ {
struct inet6_dev *idev; const struct inet6_ifaddr *ifa;
struct inet6_ifaddr *ifa; const struct inet6_dev *idev;
int onlink; int onlink;
onlink = 0; onlink = 0;
rcu_read_lock(); rcu_read_lock();
idev = __in6_dev_get(dev); idev = __in6_dev_get(dev);
if (idev) { if (idev) {
read_lock_bh(&idev->lock); list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
list_for_each_entry(ifa, &idev->addr_list, if_list) {
onlink = ipv6_prefix_equal(addr, &ifa->addr, onlink = ipv6_prefix_equal(addr, &ifa->addr,
ifa->prefix_len); ifa->prefix_len);
if (onlink) if (onlink)
break; break;
} }
read_unlock_bh(&idev->lock);
} }
rcu_read_unlock(); rcu_read_unlock();
return onlink; return onlink;
...@@ -3562,7 +3551,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) ...@@ -3562,7 +3551,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
struct inet6_dev *idev; struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp; struct inet6_ifaddr *ifa, *tmp;
struct list_head del_list;
int _keep_addr; int _keep_addr;
bool keep_addr; bool keep_addr;
int state, i; int state, i;
...@@ -3654,7 +3642,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) ...@@ -3654,7 +3642,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
*/ */
keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
struct rt6_info *rt = NULL; struct rt6_info *rt = NULL;
bool keep; bool keep;
...@@ -3663,8 +3650,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) ...@@ -3663,8 +3650,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr); !addr_is_local(&ifa->addr);
if (!keep)
list_move(&ifa->if_list, &del_list);
write_unlock_bh(&idev->lock); write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock); spin_lock_bh(&ifa->lock);
...@@ -3698,19 +3683,14 @@ static int addrconf_ifdown(struct net_device *dev, int how) ...@@ -3698,19 +3683,14 @@ static int addrconf_ifdown(struct net_device *dev, int how)
} }
write_lock_bh(&idev->lock); write_lock_bh(&idev->lock);
if (!keep) {
list_del_rcu(&ifa->if_list);
in6_ifa_put(ifa);
}
} }
write_unlock_bh(&idev->lock); write_unlock_bh(&idev->lock);
/* now clean up addresses to be removed */
while (!list_empty(&del_list)) {
ifa = list_first_entry(&del_list,
struct inet6_ifaddr, if_list);
list_del(&ifa->if_list);
in6_ifa_put(ifa);
}
/* Step 5: Discard anycast and multicast list */ /* Step 5: Discard anycast and multicast list */
if (how) { if (how) {
ipv6_ac_destroy_dev(idev); ipv6_ac_destroy_dev(idev);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册