提交 bee3c3c9 编写于 作者: M Moni Shoua 提交者: Doug Ledford

IB/cma: Join and leave multicast groups with IGMP

Since RoCEv2 is a protocol over IP header it is required to send IGMP
join and leave requests to the network when joining and leaving
multicast groups.
Signed-off-by: NMoni Shoua <monis@mellanox.com>
Signed-off-by: NDoug Ledford <dledford@redhat.com>
上级 25f40220
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <linux/in6.h> #include <linux/in6.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/igmp.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/inetdevice.h> #include <linux/inetdevice.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -304,6 +305,7 @@ struct cma_multicast { ...@@ -304,6 +305,7 @@ struct cma_multicast {
void *context; void *context;
struct sockaddr_storage addr; struct sockaddr_storage addr;
struct kref mcref; struct kref mcref;
bool igmp_joined;
}; };
struct cma_work { struct cma_work {
...@@ -400,6 +402,26 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) ...@@ -400,6 +402,26 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
} }
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
struct in_device *in_dev = NULL;
if (ndev) {
rtnl_lock();
in_dev = __in_dev_get_rtnl(ndev);
if (in_dev) {
if (join)
ip_mc_inc_group(in_dev,
*(__be32 *)(mgid->raw + 12));
else
ip_mc_dec_group(in_dev,
*(__be32 *)(mgid->raw + 12));
}
rtnl_unlock();
}
return (in_dev) ? 0 : -ENODEV;
}
static void _cma_attach_to_dev(struct rdma_id_private *id_priv, static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev) struct cma_device *cma_dev)
{ {
...@@ -1532,8 +1554,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) ...@@ -1532,8 +1554,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
id_priv->id.port_num)) { id_priv->id.port_num)) {
ib_sa_free_multicast(mc->multicast.ib); ib_sa_free_multicast(mc->multicast.ib);
kfree(mc); kfree(mc);
} else } else {
if (mc->igmp_joined) {
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev = NULL;
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(&init_net,
dev_addr->bound_dev_if);
if (ndev) {
cma_igmp_send(ndev,
&mc->multicast.ib->rec.mgid,
false);
dev_put(ndev);
}
}
kref_put(&mc->mcref, release_mc); kref_put(&mc->mcref, release_mc);
}
} }
} }
...@@ -3645,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) ...@@ -3645,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
event.status = status; event.status = status;
event.param.ud.private_data = mc->context; event.param.ud.private_data = mc->context;
if (!status) { if (!status) {
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev =
dev_get_by_index(&init_net, dev_addr->bound_dev_if);
enum ib_gid_type gid_type =
id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
event.event = RDMA_CM_EVENT_MULTICAST_JOIN; event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device, ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec, id_priv->id.port_num, &multicast->rec,
ndev, gid_type,
&event.param.ud.ah_attr); &event.param.ud.ah_attr);
event.param.ud.qp_num = 0xFFFFFF; event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
if (ndev)
dev_put(ndev);
} else } else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR; event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
...@@ -3783,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, ...@@ -3783,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
{ {
struct iboe_mcast_work *work; struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err; int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL; struct net_device *ndev = NULL;
enum ib_gid_type gid_type;
if (cma_zero_addr((struct sockaddr *)&mc->addr)) if (cma_zero_addr((struct sockaddr *)&mc->addr))
return -EINVAL; return -EINVAL;
...@@ -3815,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, ...@@ -3815,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.rate = iboe_get_rate(ndev); mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
mc->multicast.ib->rec.hop_limit = 1; mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
true);
if (!err) {
mc->igmp_joined = true;
mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
}
} else {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
err = -ENOTSUPP;
}
dev_put(ndev); dev_put(ndev);
if (!mc->multicast.ib->rec.mtu) { if (err || !mc->multicast.ib->rec.mtu) {
err = -EINVAL; if (!err)
err = -EINVAL;
goto out2; goto out2;
} }
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
...@@ -3856,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, ...@@ -3856,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
memcpy(&mc->addr, addr, rdma_addr_size(addr)); memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context; mc->context = context;
mc->id_priv = id_priv; mc->id_priv = id_priv;
mc->igmp_joined = false;
spin_lock(&id_priv->lock); spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list); list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock); spin_unlock(&id_priv->lock);
...@@ -3901,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ...@@ -3901,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
if (rdma_cap_ib_mcast(id->device, id->port_num)) { if (rdma_cap_ib_mcast(id->device, id->port_num)) {
ib_sa_free_multicast(mc->multicast.ib); ib_sa_free_multicast(mc->multicast.ib);
kfree(mc); kfree(mc);
} else if (rdma_protocol_roce(id->device, id->port_num)) } else if (rdma_protocol_roce(id->device, id->port_num)) {
if (mc->igmp_joined) {
struct rdma_dev_addr *dev_addr =
&id->route.addr.dev_addr;
struct net_device *ndev = NULL;
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(&init_net,
dev_addr->bound_dev_if);
if (ndev) {
cma_igmp_send(ndev,
&mc->multicast.ib->rec.mgid,
false);
dev_put(ndev);
}
mc->igmp_joined = false;
}
kref_put(&mc->mcref, release_mc); kref_put(&mc->mcref, release_mc);
}
return; return;
} }
} }
......
...@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec); ...@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec, struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
struct ib_ah_attr *ah_attr) struct ib_ah_attr *ah_attr)
{ {
int ret; int ret;
u16 gid_index; u16 gid_index;
u8 p; u8 p;
ret = ib_find_cached_gid(device, &rec->port_gid, IB_GID_TYPE_IB, if (rdma_protocol_roce(device, port_num)) {
NULL, &p, &gid_index); ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
gid_type, port_num,
ndev,
&gid_index);
} else if (rdma_protocol_ib(device, port_num)) {
ret = ib_find_cached_gid(device, &rec->port_gid,
IB_GID_TYPE_IB, NULL, &p,
&gid_index);
} else {
ret = -EINVAL;
}
if (ret) if (ret)
return ret; return ret;
......
...@@ -403,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, ...@@ -403,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
*/ */
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec, struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
struct ib_ah_attr *ah_attr); struct ib_ah_attr *ah_attr);
/** /**
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册