提交 78a50a5e 编写于 作者: H Hans Westgaard Ry 提交者: Doug Ledford

IB/ipoib: Add handling for sending of skb with many frags

IPoIB converts skb-fragments to sge adding 1 extra sge when SG is enabled.
Current codepath assumes that the max number of sge a device support
is at least MAX_SKB_FRAGS+1, there is no interaction with upper layers
to limit number of fragments in an skb if a device suports fewer
sges. The assumptions also lead to requesting a fixed number of sge
when IPoIB creates queue-pairs with SG enabled.

A fallback/slowpath is implemented using skb_linearize to
handle cases where the conversion would result in more sges than supported.
Signed-off-by: NHans Westgaard Ry <hans.westgaard.ry@oracle.com>
Reviewed-by: NHåkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: NWei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: NYuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: NDoug Ledford <dledford@redhat.com>
上级 fc77dbd3
...@@ -244,6 +244,7 @@ struct ipoib_cm_tx { ...@@ -244,6 +244,7 @@ struct ipoib_cm_tx {
unsigned tx_tail; unsigned tx_tail;
unsigned long flags; unsigned long flags;
u32 mtu; u32 mtu;
unsigned max_send_sge;
}; };
struct ipoib_cm_rx_buf { struct ipoib_cm_rx_buf {
...@@ -390,6 +391,7 @@ struct ipoib_dev_priv { ...@@ -390,6 +391,7 @@ struct ipoib_dev_priv {
int hca_caps; int hca_caps;
struct ipoib_ethtool_st ethtool; struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer; struct timer_list poll_timer;
unsigned max_send_sge;
}; };
struct ipoib_ah { struct ipoib_ah {
......
...@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ...@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
int rc; int rc;
unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
if (unlikely(skb->len > tx->mtu)) { if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
...@@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ...@@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
return; return;
} }
if (skb_shinfo(skb)->nr_frags > usable_sge) {
if (skb_linearize(skb) < 0) {
ipoib_warn(priv, "skb could not be linearized\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
ipoib_warn(priv, "too many frags after skb linearize\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
}
ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
tx->tx_head, skb->len, tx->qp->qp_num); tx->tx_head, skb->len, tx->qp->qp_num);
...@@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ ...@@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
struct ib_qp *tx_qp; struct ib_qp *tx_qp;
if (dev->features & NETIF_F_SG) if (dev->features & NETIF_F_SG)
attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; attr.cap.max_send_sge =
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
tx_qp = ib_create_qp(priv->pd, &attr); tx_qp = ib_create_qp(priv->pd, &attr);
if (PTR_ERR(tx_qp) == -EINVAL) { if (PTR_ERR(tx_qp) == -EINVAL) {
...@@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ ...@@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
tx_qp = ib_create_qp(priv->pd, &attr); tx_qp = ib_create_qp(priv->pd, &attr);
} }
tx->max_send_sge = attr.cap.max_send_sge;
return tx_qp; return tx_qp;
} }
......
...@@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req; struct ipoib_tx_buf *tx_req;
int hlen, rc; int hlen, rc;
void *phead; void *phead;
unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
if (skb_is_gso(skb)) { if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
...@@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
phead = NULL; phead = NULL;
hlen = 0; hlen = 0;
} }
if (skb_shinfo(skb)->nr_frags > usable_sge) {
if (skb_linearize(skb) < 0) {
ipoib_warn(priv, "skb could not be linearized\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
/* Does skb_linearize return ok without reducing nr_frags? */
if (skb_shinfo(skb)->nr_frags > usable_sge) {
ipoib_warn(priv, "too many frags after skb linearize\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
}
ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
skb->len, address, qpn); skb->len, address, qpn);
......
...@@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (dev->features & NETIF_F_SG) if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; init_attr.cap.max_send_sge =
min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
priv->qp = ib_create_qp(priv->pd, &init_attr); priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) { if (IS_ERR(priv->qp)) {
...@@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->rx_wr.next = NULL; priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge; priv->rx_wr.sg_list = priv->rx_sge;
priv->max_send_sge = init_attr.cap.max_send_sge;
return 0; return 0;
out_free_send_cq: out_free_send_cq:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册