提交 df24cd4f 编写于 作者: D David S. Miller

Merge branch 'mlx4-XDP-TX-improvements'

Tariq Toukan says:

====================
mlx4_en XDP TX improvements

This patchset contains performance improvements
to the XDP_TX use case in the mlx4 Eth driver.

Patch 1 is a simple change in a function parameter type.
Patch 2 replaces a call to a generic function with the
  relevant parts inlined.
Patch 3 moves the write of descriptors' constant values
  from data path to control path.

Series generated against net-next commit:
833e0e2f net: dst: move cpu inside ifdef to avoid compilation warning
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev) ...@@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev)
mlx4_en_arm_cq(priv, cq); mlx4_en_arm_cq(priv, cq);
} else { } else {
mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring);
mlx4_en_init_recycle_ring(priv, i); mlx4_en_init_recycle_ring(priv, i);
/* XDP TX CQ should never be armed */ /* XDP TX CQ should never be armed */
} }
......
...@@ -778,7 +778,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -778,7 +778,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
case XDP_PASS: case XDP_PASS:
break; break;
case XDP_TX: case XDP_TX:
if (likely(!mlx4_en_xmit_frame(ring, frags, dev, if (likely(!mlx4_en_xmit_frame(ring, frags, priv,
length, cq_ring, length, cq_ring,
&doorbell_pending))) { &doorbell_pending))) {
frags[0].page = NULL; frags[0].page = NULL;
......
...@@ -1085,13 +1085,35 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1085,13 +1085,35 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \ #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
/ 16) & 0x3f) / 16) & 0x3f)
void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring)
{
int i;
for (i = 0; i < ring->size; i++) {
struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
struct mlx4_en_tx_desc *tx_desc = ring->buf +
(i << LOG_TXBB_SIZE);
tx_info->map0_byte_count = PAGE_SIZE;
tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
tx_info->ts_requested = 0;
tx_info->nr_maps = 1;
tx_info->linear = 1;
tx_info->inl = 0;
tx_desc->data.lkey = ring->mr_key;
tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
}
}
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame, struct mlx4_en_rx_alloc *frame,
struct net_device *dev, unsigned int length, struct mlx4_en_priv *priv, unsigned int length,
int tx_ind, bool *doorbell_pending) int tx_ind, bool *doorbell_pending)
{ {
struct mlx4_en_priv *priv = netdev_priv(dev);
union mlx4_wqe_qpn_vlan qpn_vlan = {};
struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_desc *tx_desc;
struct mlx4_en_tx_info *tx_info; struct mlx4_en_tx_info *tx_info;
struct mlx4_wqe_data_seg *data; struct mlx4_wqe_data_seg *data;
...@@ -1123,25 +1145,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, ...@@ -1123,25 +1145,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
tx_info->page = frame->page; tx_info->page = frame->page;
frame->page = NULL; frame->page = NULL;
tx_info->map0_dma = dma; tx_info->map0_dma = dma;
tx_info->map0_byte_count = PAGE_SIZE;
tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
tx_info->ts_requested = 0;
tx_info->nr_maps = 1;
tx_info->linear = 1;
tx_info->inl = 0;
dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset, dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
length, PCI_DMA_TODEVICE); length, PCI_DMA_TODEVICE);
data->addr = cpu_to_be64(dma + frame->page_offset); data->addr = cpu_to_be64(dma + frame->page_offset);
data->lkey = ring->mr_key;
dma_wmb(); dma_wmb();
data->byte_count = cpu_to_be32(length); data->byte_count = cpu_to_be32(length);
/* tx completion can avoid cache line miss for common cases */ /* tx completion can avoid cache line miss for common cases */
tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
op_own = cpu_to_be32(MLX4_OPCODE_SEND) | op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
((ring->prod & ring->size) ? ((ring->prod & ring->size) ?
...@@ -1152,10 +1165,13 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, ...@@ -1152,10 +1165,13 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
ring->prod += MLX4_EN_XDP_TX_NRTXBB; ring->prod += MLX4_EN_XDP_TX_NRTXBB;
qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; /* Ensure new descriptor hits memory
* before setting ownership of this descriptor to HW
*/
dma_wmb();
tx_desc->ctrl.owner_opcode = op_own;
ring->xmit_more++;
mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
op_own, false, false);
*doorbell_pending = true; *doorbell_pending = true;
return NETDEV_TX_OK; return NETDEV_TX_OK;
......
...@@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, ...@@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame, struct mlx4_en_rx_alloc *frame,
struct net_device *dev, unsigned int length, struct mlx4_en_priv *priv, unsigned int length,
int tx_ind, bool *doorbell_pending); int tx_ind, bool *doorbell_pending);
void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
...@@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ...@@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
int node, int queue_index); int node, int queue_index);
void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring **pring); struct mlx4_en_tx_ring **pring);
void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring);
int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, struct mlx4_en_tx_ring *ring,
int cq, int user_prio); int cq, int user_prio);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册