提交 d07418af 编写于 作者: E Eric Dumazet 提交者: Jakub Kicinski

ipv4: avoid quadratic behavior in netns dismantle

net/ipv4/fib_semantics.c uses an hash table of 256 slots,
keyed by device ifindexes: fib_info_devhash[DEVINDEX_HASHSIZE]

Problem is that with network namespaces, devices tend
to use the same ifindex.

lo device for instance has a fixed ifindex of one,
for all network namespaces.

This means that hosts with thousands of netns spend
a lot of time looking at some hash buckets with thousands
of elements, notably at netns dismantle.

Simply add a per netns perturbation (net_hash_mix())
to spread elements more uniformely.

Also change fib_devindex_hashfn() to use more entropy.

Fixes: aa79e66e ("net: Make ifindex generation per-net namespace")
Signed-off-by: NEric Dumazet <edumazet@google.com>
Reviewed-by: NDavid Ahern <dsahern@kernel.org>
Signed-off-by: NJakub Kicinski <kuba@kernel.org>
上级 8eb896a7
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/hash.h>
#include <net/arp.h> #include <net/arp.h>
#include <net/ip.h> #include <net/ip.h>
...@@ -319,11 +320,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) ...@@ -319,11 +320,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
static inline unsigned int fib_devindex_hashfn(unsigned int val) static inline unsigned int fib_devindex_hashfn(unsigned int val)
{ {
unsigned int mask = DEVINDEX_HASHSIZE - 1; return hash_32(val, DEVINDEX_HASHBITS);
}
static struct hlist_head *
fib_info_devhash_bucket(const struct net_device *dev)
{
u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
return (val ^ return &fib_info_devhash[fib_devindex_hashfn(val)];
(val >> DEVINDEX_HASHBITS) ^
(val >> (DEVINDEX_HASHBITS * 2))) & mask;
} }
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
...@@ -433,12 +438,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) ...@@ -433,12 +438,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
{ {
struct hlist_head *head; struct hlist_head *head;
struct fib_nh *nh; struct fib_nh *nh;
unsigned int hash;
spin_lock(&fib_info_lock); spin_lock(&fib_info_lock);
hash = fib_devindex_hashfn(dev->ifindex); head = fib_info_devhash_bucket(dev);
head = &fib_info_devhash[hash];
hlist_for_each_entry(nh, head, nh_hash) { hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev && if (nh->fib_nh_dev == dev &&
nh->fib_nh_gw4 == gw && nh->fib_nh_gw4 == gw &&
...@@ -1609,12 +1613,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg, ...@@ -1609,12 +1613,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
} else { } else {
change_nexthops(fi) { change_nexthops(fi) {
struct hlist_head *head; struct hlist_head *head;
unsigned int hash;
if (!nexthop_nh->fib_nh_dev) if (!nexthop_nh->fib_nh_dev)
continue; continue;
hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
head = &fib_info_devhash[hash];
hlist_add_head(&nexthop_nh->nh_hash, head); hlist_add_head(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi) } endfor_nexthops(fi)
} }
...@@ -1966,8 +1968,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) ...@@ -1966,8 +1968,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{ {
unsigned int hash = fib_devindex_hashfn(dev->ifindex); struct hlist_head *head = fib_info_devhash_bucket(dev);
struct hlist_head *head = &fib_info_devhash[hash];
struct fib_nh *nh; struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) { hlist_for_each_entry(nh, head, nh_hash) {
...@@ -1986,12 +1987,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) ...@@ -1986,12 +1987,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/ */
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{ {
int ret = 0; struct hlist_head *head = fib_info_devhash_bucket(dev);
int scope = RT_SCOPE_NOWHERE;
struct fib_info *prev_fi = NULL; struct fib_info *prev_fi = NULL;
unsigned int hash = fib_devindex_hashfn(dev->ifindex); int scope = RT_SCOPE_NOWHERE;
struct hlist_head *head = &fib_info_devhash[hash];
struct fib_nh *nh; struct fib_nh *nh;
int ret = 0;
if (force) if (force)
scope = -1; scope = -1;
...@@ -2136,7 +2136,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) ...@@ -2136,7 +2136,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
int fib_sync_up(struct net_device *dev, unsigned char nh_flags) int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{ {
struct fib_info *prev_fi; struct fib_info *prev_fi;
unsigned int hash;
struct hlist_head *head; struct hlist_head *head;
struct fib_nh *nh; struct fib_nh *nh;
int ret; int ret;
...@@ -2152,8 +2151,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) ...@@ -2152,8 +2151,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
} }
prev_fi = NULL; prev_fi = NULL;
hash = fib_devindex_hashfn(dev->ifindex); head = fib_info_devhash_bucket(dev);
head = &fib_info_devhash[hash];
ret = 0; ret = 0;
hlist_for_each_entry(nh, head, nh_hash) { hlist_for_each_entry(nh, head, nh_hash) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册