提交 0fb8d5a0 编写于 作者: D David S. Miller

Merge branch 'vxlan-fix-default-fdb-entry-user-space-notify-ordering-race'

Roopa Prabhu says:

====================
vxlan: fix default fdb entry user-space notify ordering/race

Problem:
In vxlan_newlink, a default fdb entry is added before register_netdev.
The default fdb creation function notifies user-space of the
fdb entry on the vxlan device which user-space does not know about yet.
(RTM_NEWNEIGH goes before RTM_NEWLINK for the same ifindex).

This series fixes the user-space netlink notification ordering issue
with the following changes:
- decouple fdb notify from fdb create.
- Move fdb notify after register_netdev.
- modify rtnl_configure_link to allow configuring a link early.
- Call rtnl_configure_link in vxlan newlink handler to notify
userspace about the newlink before fdb notify and
hence avoiding the user-space race.
====================

Fixes: afbd8bae ("vxlan: add implicit fdb entry for default destination")
Signed-off-by: NRoopa Prabhu <roopa@cumulusnetworks.com>
......@@ -636,8 +636,61 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
/* Add new entry to forwarding table -- assumes lock held */
static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
const u8 *mac, __u16 state,
__be32 src_vni, __u8 ndm_flags)
{
struct vxlan_fdb *f;
f = kmalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
return NULL;
f->state = state;
f->flags = ndm_flags;
f->updated = f->used = jiffies;
f->vni = src_vni;
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
return f;
}
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __be16 port, __be32 src_vni,
__be32 vni, __u32 ifindex, __u8 ndm_flags,
struct vxlan_fdb **fdb)
{
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
int rc;
if (vxlan->cfg.addrmax &&
vxlan->addrcnt >= vxlan->cfg.addrmax)
return -ENOSPC;
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
if (!f)
return -ENOMEM;
rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
if (rc < 0) {
kfree(f);
return rc;
}
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
vxlan_fdb_head(vxlan, mac, src_vni));
*fdb = f;
return 0;
}
/* Add new entry to forwarding table -- assumes lock held */
static int vxlan_fdb_update(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags,
__be16 port, __be32 src_vni, __be32 vni,
......@@ -687,37 +740,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
if (vxlan->cfg.addrmax &&
vxlan->addrcnt >= vxlan->cfg.addrmax)
return -ENOSPC;
/* Disallow replace to add a multicast entry */
if ((flags & NLM_F_REPLACE) &&
(is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
return -EOPNOTSUPP;
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
f = kmalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
return -ENOMEM;
notify = 1;
f->state = state;
f->flags = ndm_flags;
f->updated = f->used = jiffies;
f->vni = src_vni;
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
if (rc < 0) {
kfree(f);
rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
vni, ifindex, ndm_flags, &f);
if (rc < 0)
return rc;
}
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
vxlan_fdb_head(vxlan, mac, src_vni));
notify = 1;
}
if (notify) {
......@@ -741,13 +774,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
kfree(f);
}
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
bool do_notify)
{
netdev_dbg(vxlan->dev,
"delete %pM\n", f->eth_addr);
--vxlan->addrcnt;
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
if (do_notify)
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
hlist_del_rcu(&f->hlist);
call_rcu(&f->rcu, vxlan_fdb_free);
......@@ -863,7 +898,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
return -EAFNOSUPPORT;
spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
port, src_vni, vni, ifindex, ndm->ndm_flags);
spin_unlock_bh(&vxlan->hash_lock);
......@@ -897,7 +932,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
goto out;
}
vxlan_fdb_destroy(vxlan, f);
vxlan_fdb_destroy(vxlan, f, true);
out:
return 0;
......@@ -1006,7 +1041,7 @@ static bool vxlan_snoop(struct net_device *dev,
/* close off race between vxlan_flush and incoming packets */
if (netif_running(dev))
vxlan_fdb_create(vxlan, src_mac, src_ip,
vxlan_fdb_update(vxlan, src_mac, src_ip,
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
vxlan->cfg.dst_port,
......@@ -2364,7 +2399,7 @@ static void vxlan_cleanup(struct timer_list *t)
"garbage collect %pM\n",
f->eth_addr);
f->state = NUD_STALE;
vxlan_fdb_destroy(vxlan, f);
vxlan_fdb_destroy(vxlan, f, true);
} else if (time_before(timeout, next_timer))
next_timer = timeout;
}
......@@ -2415,7 +2450,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
spin_lock_bh(&vxlan->hash_lock);
f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f)
vxlan_fdb_destroy(vxlan, f);
vxlan_fdb_destroy(vxlan, f, true);
spin_unlock_bh(&vxlan->hash_lock);
}
......@@ -2469,7 +2504,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
continue;
/* the all_zeros_mac entry is deleted at vxlan_uninit */
if (!is_zero_ether_addr(f->eth_addr))
vxlan_fdb_destroy(vxlan, f);
vxlan_fdb_destroy(vxlan, f, true);
}
}
spin_unlock_bh(&vxlan->hash_lock);
......@@ -3160,6 +3195,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f = NULL;
int err;
err = vxlan_dev_configure(net, dev, conf, false, extack);
......@@ -3173,24 +3209,35 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
err = vxlan_fdb_create(vxlan, all_zeros_mac,
&vxlan->default_dst.remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
NLM_F_EXCL | NLM_F_CREATE,
vxlan->cfg.dst_port,
vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_ifindex,
NTF_SELF);
NTF_SELF, &f);
if (err)
return err;
}
err = register_netdevice(dev);
if (err)
goto errout;
err = rtnl_configure_link(dev, NULL);
if (err) {
vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
return err;
unregister_netdevice(dev);
goto errout;
}
/* notify default fdb entry */
if (f)
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
list_add(&vxlan->next, &vn->vxlan_list);
return 0;
errout:
if (f)
vxlan_fdb_destroy(vxlan, f, false);
return err;
}
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
......@@ -3425,6 +3472,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
struct vxlan_rdst *dst = &vxlan->default_dst;
struct vxlan_rdst old_dst;
struct vxlan_config conf;
struct vxlan_fdb *f = NULL;
int err;
err = vxlan_nl2conf(tb, data,
......@@ -3453,16 +3501,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
err = vxlan_fdb_create(vxlan, all_zeros_mac,
&dst->remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
NLM_F_CREATE | NLM_F_APPEND,
vxlan->cfg.dst_port,
dst->remote_vni,
dst->remote_vni,
dst->remote_ifindex,
NTF_SELF);
NTF_SELF, &f);
if (err) {
spin_unlock_bh(&vxlan->hash_lock);
return err;
}
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
}
spin_unlock_bh(&vxlan->hash_lock);
}
......
......@@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
return err;
}
dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
__dev_notify_flags(dev, old_flags, ~0U);
if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
__dev_notify_flags(dev, old_flags, 0U);
} else {
dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
__dev_notify_flags(dev, old_flags, ~0U);
}
return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册