提交 08b8cef4 编写于 作者: Y Yunsheng Lin 提交者: Zheng Zengkai

net: hns3: use bounce buffer when rx page can not be reused

mainline inclusion
from mainline-master
commit 99f6b5fb
category: feature
bugzilla: 173966
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=99f6b5fb5f63cf69c6e56bba8e5492c98c521a63

----------------------------------------------------------------------

Currently rx page will be reused to receive future packet when
the stack releases the previous skb quickly. If the old page
can not be reused, a new page will be allocated and mapped,
which comsumes a lot of cpu when IOMMU is in the strict mode,
especially when the application and irq/NAPI happens to run on
the same cpu.

So allocate a new frag to memcpy the data to avoid the costly
IOMMU unmapping/mapping operation, and add "frag_alloc_err"
and "frag_alloc" stats in "ethtool -S ethX" cmd.

The throughput improves above 50% when running single thread of
iperf using TCP when IOMMU is in strict mode and iperf shares the
same cpu with irq/NAPI(rx_copybreak = 2048 and mtu = 1500).
Signed-off-by: NYunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: NGuangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
Reviewed-by: NYongxin Li <liyongxin1@huawei.com>
Signed-off-by: NJunxin Chen <chenjunxin1@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 f5abfb4b
...@@ -450,6 +450,7 @@ static const struct hns3_dbg_item rx_queue_info_items[] = { ...@@ -450,6 +450,7 @@ static const struct hns3_dbg_item rx_queue_info_items[] = {
{ "HEAD", 2 }, { "HEAD", 2 },
{ "FBDNUM", 2 }, { "FBDNUM", 2 },
{ "PKTNUM", 2 }, { "PKTNUM", 2 },
{ "COPYBREAK", 2 },
{ "RING_EN", 2 }, { "RING_EN", 2 },
{ "RX_RING_EN", 2 }, { "RX_RING_EN", 2 },
{ "BASE_ADDR", 10 }, { "BASE_ADDR", 10 },
...@@ -481,6 +482,7 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring, ...@@ -481,6 +482,7 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring,
sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
HNS3_RING_RX_RING_PKTNUM_RECORD_REG)); HNS3_RING_RX_RING_PKTNUM_RECORD_REG));
sprintf(result[j++], "%9u", ring->rx_copybreak);
sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base + sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
HNS3_RING_EN_REG) ? "on" : "off"); HNS3_RING_EN_REG) ? "on" : "off");
......
...@@ -3552,6 +3552,28 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, ...@@ -3552,6 +3552,28 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
hns3_page_size(ring)) { hns3_page_size(ring)) {
desc_cb->page_offset += truesize; desc_cb->page_offset += truesize;
desc_cb->reuse_flag = 1; desc_cb->reuse_flag = 1;
} else if (frag_size <= ring->rx_copybreak) {
void *frag = napi_alloc_frag(frag_size);
if (unlikely(!frag)) {
u64_stats_update_begin(&ring->syncp);
ring->stats.frag_alloc_err++;
u64_stats_update_end(&ring->syncp);
hns3_rl_err(ring_to_netdev(ring),
"failed to allocate rx frag\n");
goto out;
}
desc_cb->reuse_flag = 1;
memcpy(frag, desc_cb->buf + frag_offset, frag_size);
skb_add_rx_frag(skb, i, virt_to_page(frag),
offset_in_page(frag), frag_size, frag_size);
u64_stats_update_begin(&ring->syncp);
ring->stats.frag_alloc++;
u64_stats_update_end(&ring->syncp);
return;
} }
out: out:
...@@ -4620,6 +4642,7 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, ...@@ -4620,6 +4642,7 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
ring = &priv->ring[q->tqp_index + queue_num]; ring = &priv->ring[q->tqp_index + queue_num];
desc_num = priv->ae_handle->kinfo.num_rx_desc; desc_num = priv->ae_handle->kinfo.num_rx_desc;
ring->queue_index = q->tqp_index; ring->queue_index = q->tqp_index;
ring->rx_copybreak = priv->rx_copybreak;
} }
hnae3_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type); hnae3_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
......
...@@ -427,6 +427,8 @@ struct ring_stats { ...@@ -427,6 +427,8 @@ struct ring_stats {
u64 csum_complete; u64 csum_complete;
u64 rx_multicast; u64 rx_multicast;
u64 non_reuse_pg; u64 non_reuse_pg;
u64 frag_alloc_err;
u64 frag_alloc;
}; };
__le16 csum; __le16 csum;
}; };
...@@ -478,6 +480,7 @@ struct hns3_enet_ring { ...@@ -478,6 +480,7 @@ struct hns3_enet_ring {
/* for Rx ring */ /* for Rx ring */
struct { struct {
u32 pull_len; /* memcpy len for current rx packet */ u32 pull_len; /* memcpy len for current rx packet */
u32 rx_copybreak;
u32 frag_num; u32 frag_num;
/* first buffer address for current packet */ /* first buffer address for current packet */
unsigned char *va; unsigned char *va;
...@@ -569,6 +572,7 @@ struct hns3_nic_priv { ...@@ -569,6 +572,7 @@ struct hns3_nic_priv {
struct hns3_enet_coalesce tx_coal; struct hns3_enet_coalesce tx_coal;
struct hns3_enet_coalesce rx_coal; struct hns3_enet_coalesce rx_coal;
u32 tx_copybreak; u32 tx_copybreak;
u32 rx_copybreak;
}; };
union l3_hdr_info { union l3_hdr_info {
......
...@@ -71,6 +71,8 @@ static const struct hns3_stats hns3_rxq_stats[] = { ...@@ -71,6 +71,8 @@ static const struct hns3_stats hns3_rxq_stats[] = {
HNS3_TQP_STAT("csum_complete", csum_complete), HNS3_TQP_STAT("csum_complete", csum_complete),
HNS3_TQP_STAT("multicast", rx_multicast), HNS3_TQP_STAT("multicast", rx_multicast),
HNS3_TQP_STAT("non_reuse_pg", non_reuse_pg), HNS3_TQP_STAT("non_reuse_pg", non_reuse_pg),
HNS3_TQP_STAT("frag_alloc_err", frag_alloc_err),
HNS3_TQP_STAT("frag_alloc", frag_alloc),
}; };
#define HNS3_PRIV_FLAGS_LEN ARRAY_SIZE(hns3_priv_flags) #define HNS3_PRIV_FLAGS_LEN ARRAY_SIZE(hns3_priv_flags)
...@@ -1610,6 +1612,9 @@ static int hns3_get_tunable(struct net_device *netdev, ...@@ -1610,6 +1612,9 @@ static int hns3_get_tunable(struct net_device *netdev,
/* all the tx rings have the same tx_copybreak */ /* all the tx rings have the same tx_copybreak */
*(u32 *)data = priv->tx_copybreak; *(u32 *)data = priv->tx_copybreak;
break; break;
case ETHTOOL_RX_COPYBREAK:
*(u32 *)data = priv->rx_copybreak;
break;
default: default:
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
break; break;
...@@ -1633,6 +1638,13 @@ static int hns3_set_tunable(struct net_device *netdev, ...@@ -1633,6 +1638,13 @@ static int hns3_set_tunable(struct net_device *netdev,
for (i = 0; i < h->kinfo.num_tqps; i++) for (i = 0; i < h->kinfo.num_tqps; i++)
priv->ring[i].tx_copybreak = priv->tx_copybreak; priv->ring[i].tx_copybreak = priv->tx_copybreak;
break;
case ETHTOOL_RX_COPYBREAK:
priv->rx_copybreak = *(u32 *)data;
for (i = h->kinfo.num_tqps; i < h->kinfo.num_tqps * 2; i++)
priv->ring[i].rx_copybreak = priv->rx_copybreak;
break; break;
default: default:
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册