提交 430bb1d4 编写于 作者: Y Yunsheng Lin 提交者: Yang Yingliang

net: hns3: add handling for xmit skb with recursive fraglist

mainline inclusion
from mainline-v5.13-rc1
commit d5d5e019
category: feature
bugzilla: NA
CVE: NA

----------------------------

Currently hns3 driver only handle the xmit skb with one level of
fraglist skb, add handling for multi level by calling hns3_tx_bd_num()
recursively when calculating bd num and calling hns3_fill_skb_to_desc()
recursively when filling tx desc.

When the skb has a fraglist level of 24, the skb is simply dropped and
stats.max_recursion_level is added to record the error. Move the stat
handling from hns3_nic_net_xmit() to hns3_nic_maybe_stop_tx() in order
to handle different error stat and add the 'max_recursion_level' and
'hw_limitation' stat.

Note that the max recursive level as 24 is chose according to below:
commit 48a1df65 ("skbuff: return -EMSGSIZE in skb_to_sgvec to
prevent overflow").

And that we are not able to find a testcase to verify the recursive
fraglist case, so Fixes tag is not provided.
Reported-by: NBarry Song <song.bao.hua@hisilicon.com>
Signed-off-by: NYunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: NHuazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
Signed-off-by: NYonglong Liu <liuyonglong@huawei.com>
Reviewed-by: Nli yongxin <liyongxin1@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 82a71589
......@@ -1185,22 +1185,22 @@ static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size,
return bd_num;
}
static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size)
static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size,
unsigned int bd_num,
unsigned int recursion_level)
{
#define HNS3_MAX_RECURSION_LEVEL 24
struct sk_buff *frag_skb;
unsigned int bd_num = 0;
/* If the total len is within the max bd limit */
if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) &&
if (likely(skb->len <= HNS3_MAX_BD_SIZE && !recursion_level &&
!skb_has_frag_list(skb) &&
skb_shinfo(skb)->nr_frags < HNS3_MAX_NON_TSO_BD_NUM))
return skb_shinfo(skb)->nr_frags + 1U;
/* The below case will always be linearized, return
* HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized.
*/
if (unlikely(skb->len > HNS3_MAX_TSO_SIZE ||
(!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)))
return HNS3_MAX_TSO_BD_NUM + 1U;
if (unlikely(recursion_level >= HNS3_MAX_RECURSION_LEVEL))
return UINT_MAX;
bd_num = hns3_skb_bd_num(skb, bd_size, bd_num);
......@@ -1208,7 +1208,8 @@ static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size)
return bd_num;
skb_walk_frags(skb, frag_skb) {
bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num);
bd_num = hns3_tx_bd_num(frag_skb, bd_size, bd_num,
recursion_level + 1);
if (bd_num > HNS3_MAX_TSO_BD_NUM)
return bd_num;
}
......@@ -1265,6 +1266,40 @@ void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size)
size[i] = skb_frag_size(&shinfo->frags[i]);
}
static int hns3_skb_linearize(struct hns3_enet_ring *ring, struct sk_buff *skb,
unsigned int bd_num)
{
/* 'bd_num == UINT_MAX' means the skb' fraglist has a
* recursion level of over HNS3_MAX_RECURSION_LEVEL.
*/
if (bd_num == UINT_MAX) {
u64_stats_update_begin(&ring->syncp);
ring->stats.over_max_recursion++;
u64_stats_update_end(&ring->syncp);
return -ENOMEM;
}
/* The skb->len has exceeded the hw limitation, linearization
* will not help.
*/
if (skb->len > HNS3_MAX_TSO_SIZE ||
(!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)) {
u64_stats_update_begin(&ring->syncp);
ring->stats.hw_limitation++;
u64_stats_update_end(&ring->syncp);
return -ENOMEM;
}
if (__skb_linearize(skb)) {
u64_stats_update_begin(&ring->syncp);
ring->stats.sw_err_cnt++;
u64_stats_update_end(&ring->syncp);
return -ENOMEM;
}
return 0;
}
static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
struct net_device *netdev,
struct sk_buff *skb)
......@@ -1273,7 +1308,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U];
unsigned int bd_num;
bd_num = hns3_tx_bd_num(skb, bd_size);
bd_num = hns3_tx_bd_num(skb, bd_size, 0, 0);
if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) &&
!hns3_skb_need_linearized(skb, bd_size, bd_num)) {
......@@ -1281,16 +1316,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
goto out;
}
if (__skb_linearize(skb))
if (hns3_skb_linearize(ring, skb, bd_num))
return -ENOMEM;
bd_num = hns3_tx_bd_count(skb->len);
if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) ||
(!skb_is_gso(skb) &&
bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
trace_hns3_over_8bd(skb);
return -ENOMEM;
}
u64_stats_update_begin(&ring->syncp);
ring->stats.tx_copy++;
......@@ -1314,6 +1343,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
return bd_num;
}
u64_stats_update_begin(&ring->syncp);
ring->stats.tx_busy++;
u64_stats_update_end(&ring->syncp);
return -EBUSY;
}
......@@ -1361,6 +1394,7 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
struct sk_buff *skb, enum hns_desc_type type)
{
unsigned int size = skb_headlen(skb);
struct sk_buff *frag_skb;
int i, ret, bd_num = 0;
if (size) {
......@@ -1385,6 +1419,15 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
bd_num += ret;
}
skb_walk_frags(skb, frag_skb) {
ret = hns3_fill_skb_to_desc(ring, frag_skb,
DESC_TYPE_FRAGLIST_SKB);
if (unlikely(ret < 0))
return ret;
bd_num += ret;
}
return bd_num;
}
......@@ -1415,8 +1458,6 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
struct hns3_enet_ring *ring = &priv->ring[skb->queue_mapping];
struct netdev_queue *dev_queue;
int pre_ntu, next_to_use_head;
struct sk_buff *frag_skb;
int bd_num = 0;
int ret;
/* Hardware can only handle short frames above 32 bytes */
......@@ -1431,15 +1472,8 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
ret = hns3_nic_maybe_stop_tx(ring, netdev, skb);
if (unlikely(ret <= 0)) {
if (ret == -EBUSY) {
u64_stats_update_begin(&ring->syncp);
ring->stats.tx_busy++;
u64_stats_update_end(&ring->syncp);
hns3_tx_doorbell(ring, 0, true);
return NETDEV_TX_BUSY;
} else if (ret == -ENOMEM) {
u64_stats_update_begin(&ring->syncp);
ring->stats.sw_err_cnt++;
u64_stats_update_end(&ring->syncp);
}
hns3_rl_err(netdev, "xmit error: %d!\n", ret);
......@@ -1452,24 +1486,14 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
if (unlikely(ret < 0))
goto fill_err;
/* 'ret < 0' means filling error, 'ret == 0' means skb->len is
* zero, which is unlikely, and 'ret > 0' means how many tx desc
* need to be notified to the hw.
*/
ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
if (unlikely(ret < 0))
goto fill_err;
bd_num += ret;
if (!skb_has_frag_list(skb))
goto out;
skb_walk_frags(skb, frag_skb) {
ret = hns3_fill_skb_to_desc(ring, frag_skb,
DESC_TYPE_FRAGLIST_SKB);
if (unlikely(ret < 0))
if (unlikely(ret <= 0))
goto fill_err;
bd_num += ret;
}
out:
pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) :
(ring->desc_num - 1);
ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
......@@ -1479,11 +1503,10 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
if (!netdev_xmit_more()) {
netdev_tx_sent_queue(dev_queue, skb->len);
hns3_tx_doorbell(ring, bd_num, true);
hns3_tx_doorbell(ring, ret, true);
} else {
dql_queued(&dev_queue->dql, skb->len);
hns3_tx_doorbell(ring, bd_num,
netif_tx_queue_stopped(dev_queue));
hns3_tx_doorbell(ring, ret, netif_tx_queue_stopped(dev_queue));
}
return NETDEV_TX_OK;
......@@ -1671,11 +1694,15 @@ static struct rtnl_link_stats64 *hns3_nic_get_stats64(struct net_device *netdev,
tx_drop += ring->stats.tx_l4_proto_err;
tx_drop += ring->stats.tx_l2l3l4_err;
tx_drop += ring->stats.tx_tso_err;
tx_drop += ring->stats.over_max_recursion;
tx_drop += ring->stats.hw_limitation;
tx_errors += ring->stats.sw_err_cnt;
tx_errors += ring->stats.tx_vlan_err;
tx_errors += ring->stats.tx_l4_proto_err;
tx_errors += ring->stats.tx_l2l3l4_err;
tx_errors += ring->stats.tx_tso_err;
tx_errors += ring->stats.over_max_recursion;
tx_errors += ring->stats.hw_limitation;
} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
/* fetch the rx stats */
......
......@@ -384,6 +384,8 @@ struct ring_stats {
u64 tx_l4_proto_err;
u64 tx_l2l3l4_err;
u64 tx_tso_err;
u64 over_max_recursion;
u64 hw_limitation;
};
struct {
u64 rx_pkts;
......
......@@ -43,6 +43,8 @@ static const struct hns3_stats hns3_txq_stats[] = {
HNS3_TQP_STAT("l4_proto_err", tx_l4_proto_err),
HNS3_TQP_STAT("l2l3l4_err", tx_l2l3l4_err),
HNS3_TQP_STAT("tso_err", tx_tso_err),
HNS3_TQP_STAT("over_max_recursion", over_max_recursion),
HNS3_TQP_STAT("hw_limitation", hw_limitation),
};
#define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册